blob: bee2e1284d68b293b380d56a4b393d1ee1723655 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020038static const char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000063
Fredrik Lundh80946112000-06-29 18:03:25 +000064#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000065#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000066#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000067/* fastest possible local call under MSVC */
68#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000069#else
Benjamin Peterson791dc832017-04-20 23:52:19 -070070#define LOCAL(type) static inline type
Guido van Rossumb700df92000-03-31 14:59:30 +000071#endif
72
73/* error codes */
74#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000075#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000076#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000077#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000078#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000079
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000081#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000082#else
83#define TRACE(v)
84#endif
85
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000086/* -------------------------------------------------------------------- */
87/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000088
Fredrik Lundh436c3d582000-06-29 08:58:44 +000089#define SRE_IS_DIGIT(ch)\
Sergey Fedoseevec014a12018-09-12 03:47:59 +050090 ((ch) <= '9' && Py_ISDIGIT(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000091#define SRE_IS_SPACE(ch)\
Sergey Fedoseevec014a12018-09-12 03:47:59 +050092 ((ch) <= ' ' && Py_ISSPACE(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000093#define SRE_IS_LINEBREAK(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030094 ((ch) == '\n')
Fredrik Lundh436c3d582000-06-29 08:58:44 +000095#define SRE_IS_WORD(ch)\
Sergey Fedoseevec014a12018-09-12 03:47:59 +050096 ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_'))
Guido van Rossumb700df92000-03-31 14:59:30 +000097
Serhiy Storchaka3557b052017-10-24 23:31:42 +030098static unsigned int sre_lower_ascii(unsigned int ch)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000099{
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300100 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000101}
102
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000103/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000104/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
105 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000106#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000107#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
108
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000109static unsigned int sre_lower_locale(unsigned int ch)
110{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000111 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000112}
113
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200114static unsigned int sre_upper_locale(unsigned int ch)
115{
116 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
117}
118
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000119/* unicode-specific character predicates */
120
Victor Stinner0058b862011-09-29 03:27:47 +0200121#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
122#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
123#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
124#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
125#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000126
127static unsigned int sre_lower_unicode(unsigned int ch)
128{
Victor Stinner0058b862011-09-29 03:27:47 +0200129 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000130}
131
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200132static unsigned int sre_upper_unicode(unsigned int ch)
133{
134 return (unsigned int) Py_UNICODE_TOUPPER(ch);
135}
136
Guido van Rossumb700df92000-03-31 14:59:30 +0000137LOCAL(int)
138sre_category(SRE_CODE category, unsigned int ch)
139{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000140 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000141
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000142 case SRE_CATEGORY_DIGIT:
143 return SRE_IS_DIGIT(ch);
144 case SRE_CATEGORY_NOT_DIGIT:
145 return !SRE_IS_DIGIT(ch);
146 case SRE_CATEGORY_SPACE:
147 return SRE_IS_SPACE(ch);
148 case SRE_CATEGORY_NOT_SPACE:
149 return !SRE_IS_SPACE(ch);
150 case SRE_CATEGORY_WORD:
151 return SRE_IS_WORD(ch);
152 case SRE_CATEGORY_NOT_WORD:
153 return !SRE_IS_WORD(ch);
154 case SRE_CATEGORY_LINEBREAK:
155 return SRE_IS_LINEBREAK(ch);
156 case SRE_CATEGORY_NOT_LINEBREAK:
157 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000158
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000159 case SRE_CATEGORY_LOC_WORD:
160 return SRE_LOC_IS_WORD(ch);
161 case SRE_CATEGORY_LOC_NOT_WORD:
162 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000163
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000164 case SRE_CATEGORY_UNI_DIGIT:
165 return SRE_UNI_IS_DIGIT(ch);
166 case SRE_CATEGORY_UNI_NOT_DIGIT:
167 return !SRE_UNI_IS_DIGIT(ch);
168 case SRE_CATEGORY_UNI_SPACE:
169 return SRE_UNI_IS_SPACE(ch);
170 case SRE_CATEGORY_UNI_NOT_SPACE:
171 return !SRE_UNI_IS_SPACE(ch);
172 case SRE_CATEGORY_UNI_WORD:
173 return SRE_UNI_IS_WORD(ch);
174 case SRE_CATEGORY_UNI_NOT_WORD:
175 return !SRE_UNI_IS_WORD(ch);
176 case SRE_CATEGORY_UNI_LINEBREAK:
177 return SRE_UNI_IS_LINEBREAK(ch);
178 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
179 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000180 }
181 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000182}
183
Serhiy Storchaka3557b052017-10-24 23:31:42 +0300184LOCAL(int)
185char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
186{
187 return ch == pattern
188 || (SRE_CODE) sre_lower_locale(ch) == pattern
189 || (SRE_CODE) sre_upper_locale(ch) == pattern;
190}
191
192
Guido van Rossumb700df92000-03-31 14:59:30 +0000193/* helpers */
194
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000195static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000196data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000197{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000198 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000199 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000200 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000201 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000202 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000203}
204
205static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000206data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000207{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000208 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000209 minsize = state->data_stack_base+size;
210 cursize = state->data_stack_size;
211 if (cursize < minsize) {
212 void* stack;
213 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300214 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000215 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000216 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000217 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000218 return SRE_ERROR_MEMORY;
219 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000220 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000221 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000222 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000223 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000224}
225
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000226/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000227
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300228#define SRE_CHAR Py_UCS1
229#define SIZEOF_SRE_CHAR 1
230#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300231#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000232
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300233/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000234
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300235#define SRE_CHAR Py_UCS2
236#define SIZEOF_SRE_CHAR 2
237#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300238#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000239
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300240/* generate 32-bit unicode version */
241
242#define SRE_CHAR Py_UCS4
243#define SIZEOF_SRE_CHAR 4
244#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300245#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000246
247/* -------------------------------------------------------------------- */
248/* factories and destructors */
249
250/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100251static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300252static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
Guido van Rossumb700df92000-03-31 14:59:30 +0000253
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300254
255/*[clinic input]
256module _sre
257class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
258class _sre.SRE_Match "MatchObject *" "&Match_Type"
259class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
260[clinic start generated code]*/
261/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
262
Larry Hastings2d0a69a2015-05-03 14:49:19 -0700263static PyTypeObject Pattern_Type;
264static PyTypeObject Match_Type;
265static PyTypeObject Scanner_Type;
266
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300267/*[clinic input]
268_sre.getcodesize -> int
269[clinic start generated code]*/
270
271static int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300272_sre_getcodesize_impl(PyObject *module)
273/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000274{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300275 return sizeof(SRE_CODE);
Guido van Rossumb700df92000-03-31 14:59:30 +0000276}
277
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300278/*[clinic input]
Serhiy Storchaka6d336a02017-05-09 23:37:14 +0300279_sre.ascii_iscased -> bool
280
281 character: int
282 /
283
284[clinic start generated code]*/
285
286static int
287_sre_ascii_iscased_impl(PyObject *module, int character)
288/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
289{
290 unsigned int ch = (unsigned int)character;
Sergey Fedoseev7f0d59f2018-09-12 17:49:09 +0500291 return ch < 128 && Py_ISALPHA(ch);
Serhiy Storchaka6d336a02017-05-09 23:37:14 +0300292}
293
294/*[clinic input]
295_sre.unicode_iscased -> bool
296
297 character: int
298 /
299
300[clinic start generated code]*/
301
302static int
303_sre_unicode_iscased_impl(PyObject *module, int character)
304/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
305{
306 unsigned int ch = (unsigned int)character;
307 return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
308}
309
310/*[clinic input]
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300311_sre.ascii_tolower -> int
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300312
313 character: int
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300314 /
315
316[clinic start generated code]*/
317
318static int
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300319_sre_ascii_tolower_impl(PyObject *module, int character)
320/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000321{
Serhiy Storchaka3557b052017-10-24 23:31:42 +0300322 return sre_lower_ascii(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000323}
324
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300325/*[clinic input]
326_sre.unicode_tolower -> int
327
328 character: int
329 /
330
331[clinic start generated code]*/
332
333static int
334_sre_unicode_tolower_impl(PyObject *module, int character)
335/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
336{
337 return sre_lower_unicode(character);
338}
339
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000340LOCAL(void)
341state_reset(SRE_STATE* state)
342{
animalize4a7f44a2019-02-18 21:26:37 +0800343 /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000344 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000345
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000346 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000347 state->lastindex = -1;
348
349 state->repeat = NULL;
350
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000351 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000352}
353
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000354static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200355getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300356 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600357 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000358{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000359 /* given a python object, return a data pointer, a length (in
360 characters), and a character size. return NULL if the object
361 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000362
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000363 /* Unicode objects do not support the buffer API. So, get the data
364 directly instead. */
365 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200366 if (PyUnicode_READY(string) == -1)
367 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200368 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200369 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300370 *p_isbytes = 0;
371 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000372 }
373
Victor Stinner0058b862011-09-29 03:27:47 +0200374 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300375 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200376 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300377 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000378 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000379
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300380 *p_length = view->len;
381 *p_charsize = 1;
382 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000383
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300384 if (view->buf == NULL) {
385 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
386 PyBuffer_Release(view);
387 view->buf = NULL;
388 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000389 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300390 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000391}
392
393LOCAL(PyObject*)
394state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000395 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000396{
397 /* prepare state object */
398
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000399 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300400 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000401 void* ptr;
402
403 memset(state, 0, sizeof(SRE_STATE));
404
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300405 state->mark = PyMem_New(void *, pattern->groups * 2);
406 if (!state->mark) {
407 PyErr_NoMemory();
408 goto err;
409 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000410 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000411 state->lastindex = -1;
412
Benjamin Petersone48944b2012-03-07 14:50:25 -0600413 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300414 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000415 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600416 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000417
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300418 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600419 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200420 "cannot use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600421 goto err;
422 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300423 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600424 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200425 "cannot use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600426 goto err;
427 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000428
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000429 /* adjust boundaries */
430 if (start < 0)
431 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000432 else if (start > length)
433 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000434
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000435 if (end < 0)
436 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000437 else if (end > length)
438 end = length;
439
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300440 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000441 state->charsize = charsize;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200442 state->match_all = 0;
443 state->must_advance = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000444
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000445 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000446
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000447 state->start = (void*) ((char*) ptr + start * state->charsize);
448 state->end = (void*) ((char*) ptr + end * state->charsize);
449
450 Py_INCREF(string);
451 state->string = string;
452 state->pos = start;
453 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000454
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000455 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600456 err:
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300457 PyMem_Del(state->mark);
458 state->mark = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600459 if (state->buffer.buf)
460 PyBuffer_Release(&state->buffer);
461 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000462}
463
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000464LOCAL(void)
465state_fini(SRE_STATE* state)
466{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600467 if (state->buffer.buf)
468 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000469 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000470 data_stack_dealloc(state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300471 PyMem_Del(state->mark);
472 state->mark = NULL;
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000473}
474
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000475/* calculate offset from start of string */
476#define STATE_OFFSET(state, member)\
477 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
478
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000479LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300480getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300481 PyObject* string, Py_ssize_t start, Py_ssize_t end)
482{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300483 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300484 if (PyBytes_CheckExact(string) &&
485 start == 0 && end == PyBytes_GET_SIZE(string)) {
486 Py_INCREF(string);
487 return string;
488 }
489 return PyBytes_FromStringAndSize(
490 (const char *)ptr + start, end - start);
491 }
492 else {
493 return PyUnicode_Substring(string, start, end);
494 }
495}
496
497LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000498state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000499{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000500 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000501
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000502 index = (index - 1) * 2;
503
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000504 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000505 if (empty)
506 /* want empty string */
507 i = j = 0;
508 else {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200509 Py_RETURN_NONE;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000510 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000511 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000512 i = STATE_OFFSET(state, state->mark[index]);
513 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000514 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000515
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300516 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000517}
518
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000519static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100520pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000521{
522 switch (status) {
523 case SRE_ERROR_RECURSION_LIMIT:
Yury Selivanovf488fb42015-07-03 01:04:23 -0400524 /* This error code seems to be unused. */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000525 PyErr_SetString(
Yury Selivanovf488fb42015-07-03 01:04:23 -0400526 PyExc_RecursionError,
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000527 "maximum recursion limit exceeded"
528 );
529 break;
530 case SRE_ERROR_MEMORY:
531 PyErr_NoMemory();
532 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000533 case SRE_ERROR_INTERRUPTED:
534 /* An exception has already been raised, so let it fly */
535 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000536 default:
537 /* other error codes indicate compiler/engine bugs */
538 PyErr_SetString(
539 PyExc_RuntimeError,
540 "internal error in regular expression engine"
541 );
542 }
543}
544
Guido van Rossumb700df92000-03-31 14:59:30 +0000545static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000546pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000547{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000548 if (self->weakreflist != NULL)
549 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000550 Py_XDECREF(self->pattern);
551 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000552 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000553 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000554}
555
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300556LOCAL(Py_ssize_t)
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200557sre_match(SRE_STATE* state, SRE_CODE* pattern)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300558{
559 if (state->charsize == 1)
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200560 return sre_ucs1_match(state, pattern, 1);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300561 if (state->charsize == 2)
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200562 return sre_ucs2_match(state, pattern, 1);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300563 assert(state->charsize == 4);
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200564 return sre_ucs4_match(state, pattern, 1);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300565}
566
567LOCAL(Py_ssize_t)
568sre_search(SRE_STATE* state, SRE_CODE* pattern)
569{
570 if (state->charsize == 1)
571 return sre_ucs1_search(state, pattern);
572 if (state->charsize == 2)
573 return sre_ucs2_search(state, pattern);
574 assert(state->charsize == 4);
575 return sre_ucs4_search(state, pattern);
576}
577
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300578/*[clinic input]
579_sre.SRE_Pattern.match
580
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200581 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300582 pos: Py_ssize_t = 0
583 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300584
585Matches zero or more characters at the beginning of the string.
586[clinic start generated code]*/
587
Larry Hastings16c51912014-01-07 11:53:01 -0800588static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300589_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200590 Py_ssize_t pos, Py_ssize_t endpos)
591/*[clinic end generated code: output=ea2d838888510661 input=a2ba191647abebe5]*/
Larry Hastings16c51912014-01-07 11:53:01 -0800592{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000593 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100594 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300595 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000596
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300597 if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000598 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000599
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000600 state.ptr = state.start;
601
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000602 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
603
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200604 status = sre_match(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000605
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000606 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300607 if (PyErr_Occurred()) {
608 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000609 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300610 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000611
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300612 match = pattern_new_match(self, &state, status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000613 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300614 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000615}
616
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300617/*[clinic input]
618_sre.SRE_Pattern.fullmatch
619
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200620 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300621 pos: Py_ssize_t = 0
622 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300623
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +0300624Matches against all of the string.
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300625[clinic start generated code]*/
626
627static PyObject *
628_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200629 Py_ssize_t pos, Py_ssize_t endpos)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +0300630/*[clinic end generated code: output=5833c47782a35f4a input=d9fb03a7625b5828]*/
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200631{
632 SRE_STATE state;
633 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300634 PyObject *match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200635
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300636 if (!state_init(&state, self, string, pos, endpos))
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200637 return NULL;
638
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200639 state.ptr = state.start;
640
641 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
642
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200643 state.match_all = 1;
644 status = sre_match(&state, PatternObject_GetCode(self));
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200645
646 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300647 if (PyErr_Occurred()) {
648 state_fini(&state);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200649 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300650 }
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200651
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300652 match = pattern_new_match(self, &state, status);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200653 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300654 return match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200655}
656
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300657/*[clinic input]
658_sre.SRE_Pattern.search
659
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200660 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300661 pos: Py_ssize_t = 0
662 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300663
664Scan through string looking for a match, and return a corresponding match object instance.
665
666Return None if no position in the string matches.
667[clinic start generated code]*/
668
669static PyObject *
670_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200671 Py_ssize_t pos, Py_ssize_t endpos)
672/*[clinic end generated code: output=25f302a644e951e8 input=4ae5cb7dc38fed1b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000673{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000674 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100675 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300676 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000677
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300678 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000679 return NULL;
680
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000681 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
682
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300683 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000684
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000685 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
686
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300687 if (PyErr_Occurred()) {
688 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000689 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300690 }
Thomas Wouters89f507f2006-12-13 04:49:30 +0000691
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300692 match = pattern_new_match(self, &state, status);
693 state_fini(&state);
694 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000695}
696
697static PyObject*
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200698call(const char* module, const char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000699{
700 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000701 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000702 PyObject* func;
703 PyObject* result;
704
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000705 if (!args)
706 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000707 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000708 if (!name)
709 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000710 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000711 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000712 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000713 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000714 func = PyObject_GetAttrString(mod, function);
715 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000716 if (!func)
717 return NULL;
718 result = PyObject_CallObject(func, args);
719 Py_DECREF(func);
720 Py_DECREF(args);
721 return result;
722}
723
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300724/*[clinic input]
725_sre.SRE_Pattern.findall
726
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200727 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300728 pos: Py_ssize_t = 0
729 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300730
731Return a list of all non-overlapping matches of pattern in string.
732[clinic start generated code]*/
733
734static PyObject *
735_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200736 Py_ssize_t pos, Py_ssize_t endpos)
737/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000738{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000739 SRE_STATE state;
740 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100741 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000742 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000743
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300744 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000745 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000746
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000747 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000748 if (!list) {
749 state_fini(&state);
750 return NULL;
751 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000752
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000753 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000754
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000755 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000756
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000757 state_reset(&state);
758
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000759 state.ptr = state.start;
760
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300761 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300762 if (PyErr_Occurred())
763 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000764
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000765 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000766 if (status == 0)
767 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000768 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000769 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000770 }
Tim Peters3d563502006-01-21 02:47:53 +0000771
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000772 /* don't bother to build a match object */
773 switch (self->groups) {
774 case 0:
775 b = STATE_OFFSET(&state, state.start);
776 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300777 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300778 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000779 if (!item)
780 goto error;
781 break;
782 case 1:
783 item = state_getslice(&state, 1, string, 1);
784 if (!item)
785 goto error;
786 break;
787 default:
788 item = PyTuple_New(self->groups);
789 if (!item)
790 goto error;
791 for (i = 0; i < self->groups; i++) {
792 PyObject* o = state_getslice(&state, i+1, string, 1);
793 if (!o) {
794 Py_DECREF(item);
795 goto error;
796 }
797 PyTuple_SET_ITEM(item, i, o);
798 }
799 break;
800 }
801
802 status = PyList_Append(list, item);
803 Py_DECREF(item);
804 if (status < 0)
805 goto error;
806
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200807 state.must_advance = (state.ptr == state.start);
808 state.start = state.ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000809 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000810
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000811 state_fini(&state);
812 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000813
814error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000815 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000816 state_fini(&state);
817 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000818
Guido van Rossumb700df92000-03-31 14:59:30 +0000819}
820
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300821/*[clinic input]
822_sre.SRE_Pattern.finditer
823
824 string: object
825 pos: Py_ssize_t = 0
826 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
827
828Return an iterator over all non-overlapping matches for the RE pattern in string.
829
830For each match, the iterator returns a match object.
831[clinic start generated code]*/
832
833static PyObject *
834_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
835 Py_ssize_t pos, Py_ssize_t endpos)
836/*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
Fredrik Lundh703ce812001-10-24 22:16:30 +0000837{
838 PyObject* scanner;
839 PyObject* search;
840 PyObject* iterator;
841
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300842 scanner = pattern_scanner(self, string, pos, endpos);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000843 if (!scanner)
844 return NULL;
845
846 search = PyObject_GetAttrString(scanner, "search");
847 Py_DECREF(scanner);
848 if (!search)
849 return NULL;
850
851 iterator = PyCallIter_New(search, Py_None);
852 Py_DECREF(search);
853
854 return iterator;
855}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000856
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300857/*[clinic input]
858_sre.SRE_Pattern.scanner
859
860 string: object
861 pos: Py_ssize_t = 0
862 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
863
864[clinic start generated code]*/
865
866static PyObject *
867_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
868 Py_ssize_t pos, Py_ssize_t endpos)
869/*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
870{
871 return pattern_scanner(self, string, pos, endpos);
872}
873
874/*[clinic input]
875_sre.SRE_Pattern.split
876
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200877 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300878 maxsplit: Py_ssize_t = 0
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300879
880Split string by the occurrences of pattern.
881[clinic start generated code]*/
882
883static PyObject *
884_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200885 Py_ssize_t maxsplit)
886/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000887{
888 SRE_STATE state;
889 PyObject* list;
890 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100891 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000892 Py_ssize_t n;
893 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000894 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000895
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200896 assert(self->codesize != 0);
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200897
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300898 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000899 return NULL;
900
901 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000902 if (!list) {
903 state_fini(&state);
904 return NULL;
905 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000906
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000907 n = 0;
908 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000909
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000910 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000911
912 state_reset(&state);
913
914 state.ptr = state.start;
915
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300916 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300917 if (PyErr_Occurred())
918 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000919
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000920 if (status <= 0) {
921 if (status == 0)
922 break;
923 pattern_error(status);
924 goto error;
925 }
Tim Peters3d563502006-01-21 02:47:53 +0000926
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000927 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300928 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000929 string, STATE_OFFSET(&state, last),
930 STATE_OFFSET(&state, state.start)
931 );
932 if (!item)
933 goto error;
934 status = PyList_Append(list, item);
935 Py_DECREF(item);
936 if (status < 0)
937 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000938
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000939 /* add groups (if any) */
940 for (i = 0; i < self->groups; i++) {
941 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000942 if (!item)
943 goto error;
944 status = PyList_Append(list, item);
945 Py_DECREF(item);
946 if (status < 0)
947 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000948 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000949
950 n = n + 1;
Serhiy Storchakafbb490f2018-01-04 11:06:13 +0200951 state.must_advance = (state.ptr == state.start);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000952 last = state.start = state.ptr;
953
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000954 }
955
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000956 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300957 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000958 string, STATE_OFFSET(&state, last), state.endpos
959 );
960 if (!item)
961 goto error;
962 status = PyList_Append(list, item);
963 Py_DECREF(item);
964 if (status < 0)
965 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000966
967 state_fini(&state);
968 return list;
969
970error:
971 Py_DECREF(list);
972 state_fini(&state);
973 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000974
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000975}
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000976
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000977static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000978pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000979 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000980{
981 SRE_STATE state;
982 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +0300983 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000984 PyObject* item;
985 PyObject* filter;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000986 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000987 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100988 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000989 Py_ssize_t n;
990 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300991 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000992 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600993 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000994
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000995 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +0000996 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000997 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +0000998 Py_INCREF(filter);
999 filter_is_callable = 1;
1000 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001001 /* if not callable, check if it's a literal string */
1002 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001003 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001004 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001005 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001006 if (charsize == 1)
1007 literal = memchr(ptr, '\\', n) == NULL;
1008 else
1009 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001010 } else {
1011 PyErr_Clear();
1012 literal = 0;
1013 }
Benjamin Petersone48944b2012-03-07 14:50:25 -06001014 if (view.buf)
1015 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001016 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001017 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001018 Py_INCREF(filter);
1019 filter_is_callable = 0;
1020 } else {
1021 /* not a literal; hand it over to the template compiler */
1022 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001023 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001024 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001025 );
1026 if (!filter)
1027 return NULL;
1028 filter_is_callable = PyCallable_Check(filter);
1029 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00001030 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001031
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001032 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001033 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001034 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001035 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001036
1037 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001038 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001039 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001040 state_fini(&state);
1041 return NULL;
1042 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001043
1044 n = i = 0;
1045
1046 while (!count || n < count) {
1047
1048 state_reset(&state);
1049
1050 state.ptr = state.start;
1051
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001052 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001053 if (PyErr_Occurred())
1054 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001055
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001056 if (status <= 0) {
1057 if (status == 0)
1058 break;
1059 pattern_error(status);
1060 goto error;
1061 }
Tim Peters3d563502006-01-21 02:47:53 +00001062
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001063 b = STATE_OFFSET(&state, state.start);
1064 e = STATE_OFFSET(&state, state.ptr);
1065
1066 if (i < b) {
1067 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001068 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001069 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001070 if (!item)
1071 goto error;
1072 status = PyList_Append(list, item);
1073 Py_DECREF(item);
1074 if (status < 0)
1075 goto error;
1076
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02001077 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001078
1079 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001080 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001081 match = pattern_new_match(self, &state, 1);
1082 if (!match)
1083 goto error;
Petr Viktorinffd97532020-02-11 17:46:57 +01001084 item = PyObject_CallOneArg(filter, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001085 Py_DECREF(match);
1086 if (!item)
1087 goto error;
1088 } else {
1089 /* filter is literal string */
1090 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001091 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001092 }
1093
1094 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001095 if (item != Py_None) {
1096 status = PyList_Append(list, item);
1097 Py_DECREF(item);
1098 if (status < 0)
1099 goto error;
1100 }
Tim Peters3d563502006-01-21 02:47:53 +00001101
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001102 i = e;
1103 n = n + 1;
Serhiy Storchakafbb490f2018-01-04 11:06:13 +02001104 state.must_advance = (state.ptr == state.start);
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02001105 state.start = state.ptr;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001106 }
1107
1108 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001109 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001110 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001111 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001112 if (!item)
1113 goto error;
1114 status = PyList_Append(list, item);
1115 Py_DECREF(item);
1116 if (status < 0)
1117 goto error;
1118 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001119
1120 state_fini(&state);
1121
Guido van Rossum4e173842001-12-07 04:25:10 +00001122 Py_DECREF(filter);
1123
Fredrik Lundhdac58492001-10-21 21:48:30 +00001124 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001125 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001126 if (!joiner) {
1127 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001128 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001129 }
1130 if (PyList_GET_SIZE(list) == 0) {
1131 Py_DECREF(list);
1132 item = joiner;
1133 }
1134 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001135 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001136 item = _PyBytes_Join(joiner, list);
1137 else
1138 item = PyUnicode_Join(joiner, list);
1139 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001140 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001141 if (!item)
1142 return NULL;
1143 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001144
1145 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001146 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001147
1148 return item;
1149
1150error:
1151 Py_DECREF(list);
1152 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001153 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001154 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001155
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001156}
1157
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001158/*[clinic input]
1159_sre.SRE_Pattern.sub
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001160
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001161 repl: object
1162 string: object
1163 count: Py_ssize_t = 0
1164
1165Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1166[clinic start generated code]*/
1167
1168static PyObject *
1169_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1170 PyObject *string, Py_ssize_t count)
1171/*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1172{
1173 return pattern_subx(self, repl, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001174}
1175
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001176/*[clinic input]
1177_sre.SRE_Pattern.subn
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001178
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001179 repl: object
1180 string: object
1181 count: Py_ssize_t = 0
1182
1183Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1184[clinic start generated code]*/
1185
1186static PyObject *
1187_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1188 PyObject *string, Py_ssize_t count)
1189/*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1190{
1191 return pattern_subx(self, repl, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001192}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001193
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001194/*[clinic input]
1195_sre.SRE_Pattern.__copy__
1196
1197[clinic start generated code]*/
1198
1199static PyObject *
1200_sre_SRE_Pattern___copy___impl(PatternObject *self)
1201/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001202{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001203 Py_INCREF(self);
1204 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001205}
1206
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001207/*[clinic input]
1208_sre.SRE_Pattern.__deepcopy__
1209
1210 memo: object
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001211 /
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001212
1213[clinic start generated code]*/
1214
1215static PyObject *
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001216_sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
1217/*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001218{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001219 Py_INCREF(self);
1220 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001221}
1222
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001223static PyObject *
1224pattern_repr(PatternObject *obj)
1225{
1226 static const struct {
1227 const char *name;
1228 int value;
1229 } flag_names[] = {
1230 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1231 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1232 {"re.LOCALE", SRE_FLAG_LOCALE},
1233 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1234 {"re.DOTALL", SRE_FLAG_DOTALL},
1235 {"re.UNICODE", SRE_FLAG_UNICODE},
1236 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1237 {"re.DEBUG", SRE_FLAG_DEBUG},
1238 {"re.ASCII", SRE_FLAG_ASCII},
1239 };
1240 PyObject *result = NULL;
1241 PyObject *flag_items;
Victor Stinner706768c2014-08-16 01:03:39 +02001242 size_t i;
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001243 int flags = obj->flags;
1244
1245 /* Omit re.UNICODE for valid string patterns. */
1246 if (obj->isbytes == 0 &&
1247 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1248 SRE_FLAG_UNICODE)
1249 flags &= ~SRE_FLAG_UNICODE;
1250
1251 flag_items = PyList_New(0);
1252 if (!flag_items)
1253 return NULL;
1254
1255 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1256 if (flags & flag_names[i].value) {
1257 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1258 if (!item)
1259 goto done;
1260
1261 if (PyList_Append(flag_items, item) < 0) {
1262 Py_DECREF(item);
1263 goto done;
1264 }
1265 Py_DECREF(item);
1266 flags &= ~flag_names[i].value;
1267 }
1268 }
1269 if (flags) {
1270 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1271 if (!item)
1272 goto done;
1273
1274 if (PyList_Append(flag_items, item) < 0) {
1275 Py_DECREF(item);
1276 goto done;
1277 }
1278 Py_DECREF(item);
1279 }
1280
1281 if (PyList_Size(flag_items) > 0) {
1282 PyObject *flags_result;
1283 PyObject *sep = PyUnicode_FromString("|");
1284 if (!sep)
1285 goto done;
1286 flags_result = PyUnicode_Join(sep, flag_items);
1287 Py_DECREF(sep);
1288 if (!flags_result)
1289 goto done;
1290 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1291 obj->pattern, flags_result);
1292 Py_DECREF(flags_result);
1293 }
1294 else {
1295 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1296 }
1297
1298done:
1299 Py_DECREF(flag_items);
1300 return result;
1301}
1302
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03001303PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001304
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001305/* PatternObject's 'groupindex' method. */
1306static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02001307pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored))
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001308{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001309 if (self->groupindex == NULL)
1310 return PyDict_New();
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001311 return PyDictProxy_New(self->groupindex);
1312}
1313
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001314static int _validate(PatternObject *self); /* Forward */
1315
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001316/*[clinic input]
1317_sre.compile
1318
1319 pattern: object
1320 flags: int
1321 code: object(subclass_of='&PyList_Type')
1322 groups: Py_ssize_t
Victor Stinner726a57d2016-11-22 23:04:39 +01001323 groupindex: object(subclass_of='&PyDict_Type')
1324 indexgroup: object(subclass_of='&PyTuple_Type')
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001325
1326[clinic start generated code]*/
1327
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001328static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001329_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001330 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1331 PyObject *indexgroup)
Victor Stinner726a57d2016-11-22 23:04:39 +01001332/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001333{
1334 /* "compile" pattern descriptor to pattern object */
1335
1336 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001337 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001338
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001339 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001340 /* coverity[ampersand_in_size] */
Victor Stinner92055202020-04-08 00:38:15 +02001341 self = PyObject_NewVar(PatternObject, &Pattern_Type, n);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001342 if (!self)
1343 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001344 self->weakreflist = NULL;
1345 self->pattern = NULL;
1346 self->groupindex = NULL;
1347 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001348
1349 self->codesize = n;
1350
1351 for (i = 0; i < n; i++) {
1352 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001353 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001354 self->code[i] = (SRE_CODE) value;
1355 if ((unsigned long) self->code[i] != value) {
1356 PyErr_SetString(PyExc_OverflowError,
1357 "regular expression code size limit exceeded");
1358 break;
1359 }
1360 }
1361
1362 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001363 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001364 return NULL;
1365 }
1366
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001367 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001368 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001369 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001370 else {
1371 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001372 int charsize;
1373 Py_buffer view;
1374 view.buf = NULL;
1375 if (!getstring(pattern, &p_length, &self->isbytes,
1376 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001377 Py_DECREF(self);
1378 return NULL;
1379 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001380 if (view.buf)
1381 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001382 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001383
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001384 Py_INCREF(pattern);
1385 self->pattern = pattern;
1386
1387 self->flags = flags;
1388
1389 self->groups = groups;
1390
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001391 if (PyDict_GET_SIZE(groupindex) > 0) {
1392 Py_INCREF(groupindex);
1393 self->groupindex = groupindex;
1394 if (PyTuple_GET_SIZE(indexgroup) > 0) {
1395 Py_INCREF(indexgroup);
1396 self->indexgroup = indexgroup;
1397 }
1398 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001399
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001400 if (!_validate(self)) {
1401 Py_DECREF(self);
1402 return NULL;
1403 }
1404
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001405 return (PyObject*) self;
1406}
1407
Guido van Rossumb700df92000-03-31 14:59:30 +00001408/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001409/* Code validation */
1410
1411/* To learn more about this code, have a look at the _compile() function in
1412 Lib/sre_compile.py. The validation functions below checks the code array
1413 for conformance with the code patterns generated there.
1414
1415 The nice thing about the generated code is that it is position-independent:
1416 all jumps are relative jumps forward. Also, jumps don't cross each other:
1417 the target of a later jump is always earlier than the target of an earlier
1418 jump. IOW, this is okay:
1419
1420 J---------J-------T--------T
1421 \ \_____/ /
1422 \______________________/
1423
1424 but this is not:
1425
1426 J---------J-------T--------T
1427 \_________\_____/ /
1428 \____________/
1429
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001430 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001431*/
1432
1433/* Defining this one enables tracing of the validator */
1434#undef VVERBOSE
1435
1436/* Trace macro for the validator */
1437#if defined(VVERBOSE)
1438#define VTRACE(v) printf v
1439#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001440#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001441#endif
1442
1443/* Report failure */
1444#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1445
1446/* Extract opcode, argument, or skip count from code array */
1447#define GET_OP \
1448 do { \
1449 VTRACE(("%p: ", code)); \
1450 if (code >= end) FAIL; \
1451 op = *code++; \
1452 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1453 } while (0)
1454#define GET_ARG \
1455 do { \
1456 VTRACE(("%p= ", code)); \
1457 if (code >= end) FAIL; \
1458 arg = *code++; \
1459 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1460 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001461#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001462 do { \
1463 VTRACE(("%p= ", code)); \
1464 if (code >= end) FAIL; \
1465 skip = *code; \
1466 VTRACE(("%lu (skip to %p)\n", \
1467 (unsigned long)skip, code+skip)); \
Benjamin Petersonca470632016-09-06 13:47:26 -07001468 if (skip-adj > (uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001469 FAIL; \
1470 code++; \
1471 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001472#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001473
1474static int
1475_validate_charset(SRE_CODE *code, SRE_CODE *end)
1476{
1477 /* Some variables are manipulated by the macros above */
1478 SRE_CODE op;
1479 SRE_CODE arg;
1480 SRE_CODE offset;
1481 int i;
1482
1483 while (code < end) {
1484 GET_OP;
1485 switch (op) {
1486
1487 case SRE_OP_NEGATE:
1488 break;
1489
1490 case SRE_OP_LITERAL:
1491 GET_ARG;
1492 break;
1493
1494 case SRE_OP_RANGE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001495 case SRE_OP_RANGE_UNI_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001496 GET_ARG;
1497 GET_ARG;
1498 break;
1499
1500 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001501 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Benjamin Petersonca470632016-09-06 13:47:26 -07001502 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001503 FAIL;
1504 code += offset;
1505 break;
1506
1507 case SRE_OP_BIGCHARSET:
1508 GET_ARG; /* Number of blocks */
1509 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001510 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001511 FAIL;
1512 /* Make sure that each byte points to a valid block */
1513 for (i = 0; i < 256; i++) {
1514 if (((unsigned char *)code)[i] >= arg)
1515 FAIL;
1516 }
1517 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001518 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Benjamin Petersonca470632016-09-06 13:47:26 -07001519 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001520 FAIL;
1521 code += offset;
1522 break;
1523
1524 case SRE_OP_CATEGORY:
1525 GET_ARG;
1526 switch (arg) {
1527 case SRE_CATEGORY_DIGIT:
1528 case SRE_CATEGORY_NOT_DIGIT:
1529 case SRE_CATEGORY_SPACE:
1530 case SRE_CATEGORY_NOT_SPACE:
1531 case SRE_CATEGORY_WORD:
1532 case SRE_CATEGORY_NOT_WORD:
1533 case SRE_CATEGORY_LINEBREAK:
1534 case SRE_CATEGORY_NOT_LINEBREAK:
1535 case SRE_CATEGORY_LOC_WORD:
1536 case SRE_CATEGORY_LOC_NOT_WORD:
1537 case SRE_CATEGORY_UNI_DIGIT:
1538 case SRE_CATEGORY_UNI_NOT_DIGIT:
1539 case SRE_CATEGORY_UNI_SPACE:
1540 case SRE_CATEGORY_UNI_NOT_SPACE:
1541 case SRE_CATEGORY_UNI_WORD:
1542 case SRE_CATEGORY_UNI_NOT_WORD:
1543 case SRE_CATEGORY_UNI_LINEBREAK:
1544 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1545 break;
1546 default:
1547 FAIL;
1548 }
1549 break;
1550
1551 default:
1552 FAIL;
1553
1554 }
1555 }
1556
1557 return 1;
1558}
1559
1560static int
1561_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1562{
1563 /* Some variables are manipulated by the macros above */
1564 SRE_CODE op;
1565 SRE_CODE arg;
1566 SRE_CODE skip;
1567
1568 VTRACE(("code=%p, end=%p\n", code, end));
1569
1570 if (code > end)
1571 FAIL;
1572
1573 while (code < end) {
1574 GET_OP;
1575 switch (op) {
1576
1577 case SRE_OP_MARK:
1578 /* We don't check whether marks are properly nested; the
1579 sre_match() code is robust even if they don't, and the worst
1580 you can get is nonsensical match results. */
1581 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001582 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001583 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1584 FAIL;
1585 }
1586 break;
1587
1588 case SRE_OP_LITERAL:
1589 case SRE_OP_NOT_LITERAL:
1590 case SRE_OP_LITERAL_IGNORE:
1591 case SRE_OP_NOT_LITERAL_IGNORE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001592 case SRE_OP_LITERAL_UNI_IGNORE:
1593 case SRE_OP_NOT_LITERAL_UNI_IGNORE:
Serhiy Storchaka898ff032017-05-05 08:53:40 +03001594 case SRE_OP_LITERAL_LOC_IGNORE:
1595 case SRE_OP_NOT_LITERAL_LOC_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001596 GET_ARG;
1597 /* The arg is just a character, nothing to check */
1598 break;
1599
1600 case SRE_OP_SUCCESS:
1601 case SRE_OP_FAILURE:
1602 /* Nothing to check; these normally end the matching process */
1603 break;
1604
1605 case SRE_OP_AT:
1606 GET_ARG;
1607 switch (arg) {
1608 case SRE_AT_BEGINNING:
1609 case SRE_AT_BEGINNING_STRING:
1610 case SRE_AT_BEGINNING_LINE:
1611 case SRE_AT_END:
1612 case SRE_AT_END_LINE:
1613 case SRE_AT_END_STRING:
1614 case SRE_AT_BOUNDARY:
1615 case SRE_AT_NON_BOUNDARY:
1616 case SRE_AT_LOC_BOUNDARY:
1617 case SRE_AT_LOC_NON_BOUNDARY:
1618 case SRE_AT_UNI_BOUNDARY:
1619 case SRE_AT_UNI_NON_BOUNDARY:
1620 break;
1621 default:
1622 FAIL;
1623 }
1624 break;
1625
1626 case SRE_OP_ANY:
1627 case SRE_OP_ANY_ALL:
1628 /* These have no operands */
1629 break;
1630
1631 case SRE_OP_IN:
1632 case SRE_OP_IN_IGNORE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001633 case SRE_OP_IN_UNI_IGNORE:
Serhiy Storchaka898ff032017-05-05 08:53:40 +03001634 case SRE_OP_IN_LOC_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001635 GET_SKIP;
1636 /* Stop 1 before the end; we check the FAILURE below */
1637 if (!_validate_charset(code, code+skip-2))
1638 FAIL;
1639 if (code[skip-2] != SRE_OP_FAILURE)
1640 FAIL;
1641 code += skip-1;
1642 break;
1643
1644 case SRE_OP_INFO:
1645 {
1646 /* A minimal info field is
1647 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1648 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1649 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001650 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001651 SRE_CODE *newcode;
1652 GET_SKIP;
1653 newcode = code+skip-1;
1654 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001655 GET_ARG;
1656 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001657 /* Check that only valid flags are present */
1658 if ((flags & ~(SRE_INFO_PREFIX |
1659 SRE_INFO_LITERAL |
1660 SRE_INFO_CHARSET)) != 0)
1661 FAIL;
1662 /* PREFIX and CHARSET are mutually exclusive */
1663 if ((flags & SRE_INFO_PREFIX) &&
1664 (flags & SRE_INFO_CHARSET))
1665 FAIL;
1666 /* LITERAL implies PREFIX */
1667 if ((flags & SRE_INFO_LITERAL) &&
1668 !(flags & SRE_INFO_PREFIX))
1669 FAIL;
1670 /* Validate the prefix */
1671 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001672 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001673 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001674 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001675 /* Here comes the prefix string */
Benjamin Petersonca470632016-09-06 13:47:26 -07001676 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001677 FAIL;
1678 code += prefix_len;
1679 /* And here comes the overlap table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001680 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001681 FAIL;
1682 /* Each overlap value should be < prefix_len */
1683 for (i = 0; i < prefix_len; i++) {
1684 if (code[i] >= prefix_len)
1685 FAIL;
1686 }
1687 code += prefix_len;
1688 }
1689 /* Validate the charset */
1690 if (flags & SRE_INFO_CHARSET) {
1691 if (!_validate_charset(code, newcode-1))
1692 FAIL;
1693 if (newcode[-1] != SRE_OP_FAILURE)
1694 FAIL;
1695 code = newcode;
1696 }
1697 else if (code != newcode) {
1698 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1699 FAIL;
1700 }
1701 }
1702 break;
1703
1704 case SRE_OP_BRANCH:
1705 {
1706 SRE_CODE *target = NULL;
1707 for (;;) {
1708 GET_SKIP;
1709 if (skip == 0)
1710 break;
1711 /* Stop 2 before the end; we check the JUMP below */
1712 if (!_validate_inner(code, code+skip-3, groups))
1713 FAIL;
1714 code += skip-3;
1715 /* Check that it ends with a JUMP, and that each JUMP
1716 has the same target */
1717 GET_OP;
1718 if (op != SRE_OP_JUMP)
1719 FAIL;
1720 GET_SKIP;
1721 if (target == NULL)
1722 target = code+skip-1;
1723 else if (code+skip-1 != target)
1724 FAIL;
1725 }
1726 }
1727 break;
1728
1729 case SRE_OP_REPEAT_ONE:
1730 case SRE_OP_MIN_REPEAT_ONE:
1731 {
1732 SRE_CODE min, max;
1733 GET_SKIP;
1734 GET_ARG; min = arg;
1735 GET_ARG; max = arg;
1736 if (min > max)
1737 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001738 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001739 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001740 if (!_validate_inner(code, code+skip-4, groups))
1741 FAIL;
1742 code += skip-4;
1743 GET_OP;
1744 if (op != SRE_OP_SUCCESS)
1745 FAIL;
1746 }
1747 break;
1748
1749 case SRE_OP_REPEAT:
1750 {
1751 SRE_CODE min, max;
1752 GET_SKIP;
1753 GET_ARG; min = arg;
1754 GET_ARG; max = arg;
1755 if (min > max)
1756 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001757 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001758 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001759 if (!_validate_inner(code, code+skip-3, groups))
1760 FAIL;
1761 code += skip-3;
1762 GET_OP;
1763 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1764 FAIL;
1765 }
1766 break;
1767
1768 case SRE_OP_GROUPREF:
1769 case SRE_OP_GROUPREF_IGNORE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001770 case SRE_OP_GROUPREF_UNI_IGNORE:
1771 case SRE_OP_GROUPREF_LOC_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001772 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001773 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001774 FAIL;
1775 break;
1776
1777 case SRE_OP_GROUPREF_EXISTS:
1778 /* The regex syntax for this is: '(?(group)then|else)', where
1779 'group' is either an integer group number or a group name,
1780 'then' and 'else' are sub-regexes, and 'else' is optional. */
1781 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001782 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001783 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001784 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001785 code--; /* The skip is relative to the first arg! */
1786 /* There are two possibilities here: if there is both a 'then'
1787 part and an 'else' part, the generated code looks like:
1788
1789 GROUPREF_EXISTS
1790 <group>
1791 <skipyes>
1792 ...then part...
1793 JUMP
1794 <skipno>
1795 (<skipyes> jumps here)
1796 ...else part...
1797 (<skipno> jumps here)
1798
1799 If there is only a 'then' part, it looks like:
1800
1801 GROUPREF_EXISTS
1802 <group>
1803 <skip>
1804 ...then part...
1805 (<skip> jumps here)
1806
1807 There is no direct way to decide which it is, and we don't want
1808 to allow arbitrary jumps anywhere in the code; so we just look
1809 for a JUMP opcode preceding our skip target.
1810 */
Benjamin Petersonca470632016-09-06 13:47:26 -07001811 if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001812 code[skip-3] == SRE_OP_JUMP)
1813 {
1814 VTRACE(("both then and else parts present\n"));
1815 if (!_validate_inner(code+1, code+skip-3, groups))
1816 FAIL;
1817 code += skip-2; /* Position after JUMP, at <skipno> */
1818 GET_SKIP;
1819 if (!_validate_inner(code, code+skip-1, groups))
1820 FAIL;
1821 code += skip-1;
1822 }
1823 else {
1824 VTRACE(("only a then part present\n"));
1825 if (!_validate_inner(code+1, code+skip-1, groups))
1826 FAIL;
1827 code += skip-1;
1828 }
1829 break;
1830
1831 case SRE_OP_ASSERT:
1832 case SRE_OP_ASSERT_NOT:
1833 GET_SKIP;
1834 GET_ARG; /* 0 for lookahead, width for lookbehind */
1835 code--; /* Back up over arg to simplify math below */
1836 if (arg & 0x80000000)
1837 FAIL; /* Width too large */
1838 /* Stop 1 before the end; we check the SUCCESS below */
1839 if (!_validate_inner(code+1, code+skip-2, groups))
1840 FAIL;
1841 code += skip-2;
1842 GET_OP;
1843 if (op != SRE_OP_SUCCESS)
1844 FAIL;
1845 break;
1846
1847 default:
1848 FAIL;
1849
1850 }
1851 }
1852
1853 VTRACE(("okay\n"));
1854 return 1;
1855}
1856
1857static int
1858_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1859{
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001860 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1861 code >= end || end[-1] != SRE_OP_SUCCESS)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001862 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001863 return _validate_inner(code, end-1, groups);
1864}
1865
1866static int
1867_validate(PatternObject *self)
1868{
1869 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1870 {
1871 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1872 return 0;
1873 }
1874 else
1875 VTRACE(("Success!\n"));
1876 return 1;
1877}
1878
1879/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001880/* match methods */
1881
1882static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001883match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001884{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001885 Py_XDECREF(self->regs);
1886 Py_XDECREF(self->string);
1887 Py_DECREF(self->pattern);
1888 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001889}
1890
1891static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001892match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001893{
Serhiy Storchaka25324972013-10-16 12:46:28 +03001894 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001895 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001896 Py_buffer view;
1897 PyObject *result;
1898 void* ptr;
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02001899 Py_ssize_t i, j;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001900
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001901 assert(0 <= index && index < self->groups);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001902 index *= 2;
1903
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001904 if (self->string == Py_None || self->mark[index] < 0) {
1905 /* return default value if the string or group is undefined */
1906 Py_INCREF(def);
1907 return def;
1908 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001909
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001910 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001911 if (ptr == NULL)
1912 return NULL;
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02001913
1914 i = self->mark[index];
1915 j = self->mark[index+1];
1916 i = Py_MIN(i, length);
1917 j = Py_MIN(j, length);
1918 result = getslice(isbytes, ptr, self->string, i, j);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001919 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001920 PyBuffer_Release(&view);
1921 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001922}
1923
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001924static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001925match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001926{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001927 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001928
Guido van Rossumddefaf32007-01-14 03:31:43 +00001929 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001930 /* Default value */
1931 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001932
Serhiy Storchaka977b3ac2016-06-18 16:48:07 +03001933 if (PyIndex_Check(index)) {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001934 i = PyNumber_AsSsize_t(index, NULL);
Serhiy Storchaka977b3ac2016-06-18 16:48:07 +03001935 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001936 else {
1937 i = -1;
Guido van Rossumb700df92000-03-31 14:59:30 +00001938
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001939 if (self->pattern->groupindex) {
1940 index = PyDict_GetItemWithError(self->pattern->groupindex, index);
1941 if (index && PyLong_Check(index)) {
1942 i = PyLong_AsSsize_t(index);
1943 }
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001944 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001945 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001946 if (i < 0 || i >= self->groups) {
1947 /* raise IndexError if we were given a bad group number */
1948 if (!PyErr_Occurred()) {
1949 PyErr_SetString(PyExc_IndexError, "no such group");
1950 }
1951 return -1;
1952 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001953
1954 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001955}
1956
1957static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001958match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001959{
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001960 Py_ssize_t i = match_getindex(self, index);
1961
1962 if (i < 0) {
1963 return NULL;
1964 }
1965
1966 return match_getslice_by_index(self, i, def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001967}
1968
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001969/*[clinic input]
1970_sre.SRE_Match.expand
1971
1972 template: object
1973
1974Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
1975[clinic start generated code]*/
1976
1977static PyObject *
1978_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
1979/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001980{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001981 /* delegate to Python code */
1982 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001983 SRE_PY_MODULE, "_expand",
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001984 PyTuple_Pack(3, self->pattern, self, template)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001985 );
1986}
1987
1988static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001989match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001990{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001991 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001992 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001993
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001994 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001995
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001996 switch (size) {
1997 case 0:
Serhiy Storchakaba85d692017-03-30 09:09:41 +03001998 result = match_getslice(self, _PyLong_Zero, Py_None);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001999 break;
2000 case 1:
2001 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2002 break;
2003 default:
2004 /* fetch multiple items */
2005 result = PyTuple_New(size);
2006 if (!result)
2007 return NULL;
2008 for (i = 0; i < size; i++) {
2009 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002010 self, PyTuple_GET_ITEM(args, i), Py_None
2011 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002012 if (!item) {
2013 Py_DECREF(result);
2014 return NULL;
2015 }
2016 PyTuple_SET_ITEM(result, i, item);
2017 }
2018 break;
2019 }
2020 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002021}
2022
Eric V. Smith605bdae2016-09-11 08:55:43 -04002023static PyObject*
2024match_getitem(MatchObject* self, PyObject* name)
2025{
2026 return match_getslice(self, name, Py_None);
2027}
2028
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002029/*[clinic input]
2030_sre.SRE_Match.groups
2031
2032 default: object = None
2033 Is used for groups that did not participate in the match.
2034
2035Return a tuple containing all the subgroups of the match, from 1.
2036[clinic start generated code]*/
2037
2038static PyObject *
2039_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2040/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002041{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002042 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002043 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002044
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002045 result = PyTuple_New(self->groups-1);
2046 if (!result)
2047 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002048
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002049 for (index = 1; index < self->groups; index++) {
2050 PyObject* item;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002051 item = match_getslice_by_index(self, index, default_value);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002052 if (!item) {
2053 Py_DECREF(result);
2054 return NULL;
2055 }
2056 PyTuple_SET_ITEM(result, index-1, item);
2057 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002058
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002059 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002060}
2061
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002062/*[clinic input]
2063_sre.SRE_Match.groupdict
2064
2065 default: object = None
2066 Is used for groups that did not participate in the match.
2067
2068Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2069[clinic start generated code]*/
2070
2071static PyObject *
2072_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2073/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002074{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002075 PyObject *result;
2076 PyObject *key;
2077 PyObject *value;
2078 Py_ssize_t pos = 0;
2079 Py_hash_t hash;
Guido van Rossumb700df92000-03-31 14:59:30 +00002080
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002081 result = PyDict_New();
2082 if (!result || !self->pattern->groupindex)
2083 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002084
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002085 while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002086 int status;
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002087 Py_INCREF(key);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002088 value = match_getslice(self, key, default_value);
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002089 if (!value) {
2090 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002091 goto failed;
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002092 }
2093 status = _PyDict_SetItem_KnownHash(result, key, value, hash);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002094 Py_DECREF(value);
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002095 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002096 if (status < 0)
2097 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002098 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002099
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002100 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002101
2102failed:
Fredrik Lundh770617b2001-01-14 15:06:11 +00002103 Py_DECREF(result);
2104 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002105}
2106
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002107/*[clinic input]
2108_sre.SRE_Match.start -> Py_ssize_t
2109
2110 group: object(c_default="NULL") = 0
2111 /
2112
2113Return index of the start of the substring matched by group.
2114[clinic start generated code]*/
2115
2116static Py_ssize_t
2117_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2118/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002119{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002120 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002121
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02002122 if (index < 0) {
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002123 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002124 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002125
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002126 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002127 return self->mark[index*2];
Guido van Rossumb700df92000-03-31 14:59:30 +00002128}
2129
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002130/*[clinic input]
2131_sre.SRE_Match.end -> Py_ssize_t
2132
2133 group: object(c_default="NULL") = 0
2134 /
2135
2136Return index of the end of the substring matched by group.
2137[clinic start generated code]*/
2138
2139static Py_ssize_t
2140_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2141/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002142{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002143 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002144
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02002145 if (index < 0) {
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002146 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002147 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002148
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002149 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002150 return self->mark[index*2+1];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002151}
2152
2153LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002154_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002155{
2156 PyObject* pair;
2157 PyObject* item;
2158
2159 pair = PyTuple_New(2);
2160 if (!pair)
2161 return NULL;
2162
Christian Heimes217cfd12007-12-02 14:31:20 +00002163 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002164 if (!item)
2165 goto error;
2166 PyTuple_SET_ITEM(pair, 0, item);
2167
Christian Heimes217cfd12007-12-02 14:31:20 +00002168 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002169 if (!item)
2170 goto error;
2171 PyTuple_SET_ITEM(pair, 1, item);
2172
2173 return pair;
2174
2175 error:
2176 Py_DECREF(pair);
2177 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002178}
2179
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002180/*[clinic input]
2181_sre.SRE_Match.span
2182
2183 group: object(c_default="NULL") = 0
2184 /
2185
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002186For match object m, return the 2-tuple (m.start(group), m.end(group)).
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002187[clinic start generated code]*/
2188
2189static PyObject *
2190_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002191/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002192{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002193 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002194
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02002195 if (index < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002196 return NULL;
2197 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002198
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002199 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002200 return _pair(self->mark[index*2], self->mark[index*2+1]);
2201}
2202
2203static PyObject*
2204match_regs(MatchObject* self)
2205{
2206 PyObject* regs;
2207 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002208 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002209
2210 regs = PyTuple_New(self->groups);
2211 if (!regs)
2212 return NULL;
2213
2214 for (index = 0; index < self->groups; index++) {
2215 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2216 if (!item) {
2217 Py_DECREF(regs);
2218 return NULL;
2219 }
2220 PyTuple_SET_ITEM(regs, index, item);
2221 }
2222
2223 Py_INCREF(regs);
2224 self->regs = regs;
2225
2226 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002227}
2228
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002229/*[clinic input]
2230_sre.SRE_Match.__copy__
2231
2232[clinic start generated code]*/
2233
2234static PyObject *
2235_sre_SRE_Match___copy___impl(MatchObject *self)
2236/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002237{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002238 Py_INCREF(self);
2239 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002240}
2241
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002242/*[clinic input]
2243_sre.SRE_Match.__deepcopy__
2244
2245 memo: object
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002246 /
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002247
2248[clinic start generated code]*/
2249
2250static PyObject *
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002251_sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
2252/*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002253{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002254 Py_INCREF(self);
2255 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002256}
2257
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002258PyDoc_STRVAR(match_doc,
2259"The result of re.match() and re.search().\n\
2260Match objects always have a boolean value of True.");
2261
2262PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002263"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002264 Return subgroup(s) of the match by indices or names.\n\
2265 For 0 returns the entire match.");
2266
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002267static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02002268match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored))
Guido van Rossumb700df92000-03-31 14:59:30 +00002269{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002270 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002271 return PyLong_FromSsize_t(self->lastindex);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002272 Py_RETURN_NONE;
Guido van Rossumb700df92000-03-31 14:59:30 +00002273}
2274
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002275static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02002276match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored))
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002277{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002278 if (self->pattern->indexgroup &&
2279 self->lastindex >= 0 &&
2280 self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2281 {
2282 PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2283 self->lastindex);
2284 Py_INCREF(result);
2285 return result;
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002286 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002287 Py_RETURN_NONE;
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002288}
2289
2290static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02002291match_regs_get(MatchObject *self, void *Py_UNUSED(ignored))
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002292{
2293 if (self->regs) {
2294 Py_INCREF(self->regs);
2295 return self->regs;
2296 } else
2297 return match_regs(self);
2298}
2299
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002300static PyObject *
2301match_repr(MatchObject *self)
2302{
2303 PyObject *result;
2304 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2305 if (group0 == NULL)
2306 return NULL;
2307 result = PyUnicode_FromFormat(
sth8b91eda2019-03-10 11:29:14 +01002308 "<%s object; span=(%zd, %zd), match=%.50R>",
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002309 Py_TYPE(self)->tp_name,
2310 self->mark[0], self->mark[1], group0);
2311 Py_DECREF(group0);
2312 return result;
2313}
2314
2315
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002316static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002317pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002318{
2319 /* create match object (from state object) */
2320
2321 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002322 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002323 char* base;
2324 int n;
2325
2326 if (status > 0) {
2327
2328 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002329 /* coverity[ampersand_in_size] */
Victor Stinner92055202020-04-08 00:38:15 +02002330 match = PyObject_NewVar(MatchObject, &Match_Type,
2331 2*(pattern->groups+1));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002332 if (!match)
2333 return NULL;
2334
2335 Py_INCREF(pattern);
2336 match->pattern = pattern;
2337
2338 Py_INCREF(state->string);
2339 match->string = state->string;
2340
2341 match->regs = NULL;
2342 match->groups = pattern->groups+1;
2343
2344 /* fill in group slices */
2345
2346 base = (char*) state->beginning;
2347 n = state->charsize;
2348
2349 match->mark[0] = ((char*) state->start - base) / n;
2350 match->mark[1] = ((char*) state->ptr - base) / n;
2351
2352 for (i = j = 0; i < pattern->groups; i++, j+=2)
2353 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2354 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2355 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2356 } else
2357 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2358
2359 match->pos = state->pos;
2360 match->endpos = state->endpos;
2361
2362 match->lastindex = state->lastindex;
2363
2364 return (PyObject*) match;
2365
2366 } else if (status == 0) {
2367
2368 /* no match */
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002369 Py_RETURN_NONE;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002370
2371 }
2372
2373 /* internal error */
2374 pattern_error(status);
2375 return NULL;
2376}
2377
2378
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002379/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002380/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002381
2382static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002383scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002384{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002385 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002386 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002387 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002388}
2389
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002390/*[clinic input]
2391_sre.SRE_Scanner.match
2392
2393[clinic start generated code]*/
2394
2395static PyObject *
2396_sre_SRE_Scanner_match_impl(ScannerObject *self)
2397/*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002398{
2399 SRE_STATE* state = &self->state;
2400 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002401 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002402
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002403 if (state->start == NULL)
2404 Py_RETURN_NONE;
2405
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002406 state_reset(state);
2407
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002408 state->ptr = state->start;
2409
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002410 status = sre_match(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002411 if (PyErr_Occurred())
2412 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002413
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002414 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002415 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002416
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002417 if (status == 0)
2418 state->start = NULL;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002419 else {
2420 state->must_advance = (state->ptr == state->start);
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002421 state->start = state->ptr;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002422 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002423
2424 return match;
2425}
2426
2427
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002428/*[clinic input]
2429_sre.SRE_Scanner.search
2430
2431[clinic start generated code]*/
2432
2433static PyObject *
2434_sre_SRE_Scanner_search_impl(ScannerObject *self)
2435/*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002436{
2437 SRE_STATE* state = &self->state;
2438 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002439 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002440
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002441 if (state->start == NULL)
2442 Py_RETURN_NONE;
2443
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002444 state_reset(state);
2445
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002446 state->ptr = state->start;
2447
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002448 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002449 if (PyErr_Occurred())
2450 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002451
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002452 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002453 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002454
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002455 if (status == 0)
2456 state->start = NULL;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002457 else {
2458 state->must_advance = (state->ptr == state->start);
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002459 state->start = state->ptr;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002460 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002461
2462 return match;
2463}
2464
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002465static PyObject *
2466pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002467{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002468 ScannerObject* scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002469
2470 /* create scanner object */
Victor Stinner92055202020-04-08 00:38:15 +02002471 scanner = PyObject_New(ScannerObject, &Scanner_Type);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002472 if (!scanner)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002473 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002474 scanner->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002475
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002476 /* create search state object */
2477 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2478 Py_DECREF(scanner);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002479 return NULL;
2480 }
2481
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002482 Py_INCREF(self);
2483 scanner->pattern = (PyObject*) self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002484
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002485 return (PyObject*) scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002486}
2487
Victor Stinnerb44fb122016-11-21 16:35:08 +01002488static Py_hash_t
2489pattern_hash(PatternObject *self)
2490{
2491 Py_hash_t hash, hash2;
2492
2493 hash = PyObject_Hash(self->pattern);
2494 if (hash == -1) {
2495 return -1;
2496 }
2497
2498 hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2499 hash ^= hash2;
2500
2501 hash ^= self->flags;
2502 hash ^= self->isbytes;
2503 hash ^= self->codesize;
2504
2505 if (hash == -1) {
2506 hash = -2;
2507 }
2508 return hash;
2509}
2510
2511static PyObject*
2512pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2513{
2514 PatternObject *left, *right;
2515 int cmp;
2516
2517 if (op != Py_EQ && op != Py_NE) {
2518 Py_RETURN_NOTIMPLEMENTED;
2519 }
2520
Dong-hee Na1b55b652020-02-17 19:09:15 +09002521 if (!Py_IS_TYPE(lefto, &Pattern_Type) || !Py_IS_TYPE(righto, &Pattern_Type)) {
Victor Stinnerb44fb122016-11-21 16:35:08 +01002522 Py_RETURN_NOTIMPLEMENTED;
2523 }
Victor Stinnerbcf4dcc2016-11-22 15:30:38 +01002524
2525 if (lefto == righto) {
2526 /* a pattern is equal to itself */
2527 return PyBool_FromLong(op == Py_EQ);
2528 }
2529
Victor Stinnerb44fb122016-11-21 16:35:08 +01002530 left = (PatternObject *)lefto;
2531 right = (PatternObject *)righto;
2532
2533 cmp = (left->flags == right->flags
2534 && left->isbytes == right->isbytes
Victor Stinnere670b2d2016-11-22 15:23:00 +01002535 && left->codesize == right->codesize);
Victor Stinnerb44fb122016-11-21 16:35:08 +01002536 if (cmp) {
2537 /* Compare the code and the pattern because the same pattern can
2538 produce different codes depending on the locale used to compile the
2539 pattern when the re.LOCALE flag is used. Don't compare groups,
2540 indexgroup nor groupindex: they are derivated from the pattern. */
2541 cmp = (memcmp(left->code, right->code,
2542 sizeof(left->code[0]) * left->codesize) == 0);
2543 }
2544 if (cmp) {
2545 cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2546 Py_EQ);
2547 if (cmp < 0) {
2548 return NULL;
2549 }
2550 }
2551 if (op == Py_NE) {
2552 cmp = !cmp;
2553 }
2554 return PyBool_FromLong(cmp);
2555}
2556
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002557#include "clinic/_sre.c.h"
2558
2559static PyMethodDef pattern_methods[] = {
2560 _SRE_SRE_PATTERN_MATCH_METHODDEF
2561 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2562 _SRE_SRE_PATTERN_SEARCH_METHODDEF
2563 _SRE_SRE_PATTERN_SUB_METHODDEF
2564 _SRE_SRE_PATTERN_SUBN_METHODDEF
2565 _SRE_SRE_PATTERN_FINDALL_METHODDEF
2566 _SRE_SRE_PATTERN_SPLIT_METHODDEF
2567 _SRE_SRE_PATTERN_FINDITER_METHODDEF
2568 _SRE_SRE_PATTERN_SCANNER_METHODDEF
2569 _SRE_SRE_PATTERN___COPY___METHODDEF
2570 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
Guido van Rossum48b069a2020-04-07 09:50:06 -07002571 {"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O|METH_CLASS,
2572 PyDoc_STR("See PEP 585")},
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002573 {NULL, NULL}
2574};
2575
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002576static PyGetSetDef pattern_getset[] = {
2577 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2578 "A dictionary mapping group names to group numbers."},
2579 {NULL} /* Sentinel */
2580};
2581
2582#define PAT_OFF(x) offsetof(PatternObject, x)
2583static PyMemberDef pattern_members[] = {
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002584 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY,
2585 "The pattern string from which the RE object was compiled."},
2586 {"flags", T_INT, PAT_OFF(flags), READONLY,
2587 "The regex matching flags."},
2588 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY,
2589 "The number of capturing groups in the pattern."},
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002590 {NULL} /* Sentinel */
2591};
2592
2593static PyTypeObject Pattern_Type = {
2594 PyVarObject_HEAD_INIT(NULL, 0)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002595 "re.Pattern",
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002596 sizeof(PatternObject), sizeof(SRE_CODE),
2597 (destructor)pattern_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002598 0, /* tp_vectorcall_offset */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002599 0, /* tp_getattr */
2600 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002601 0, /* tp_as_async */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002602 (reprfunc)pattern_repr, /* tp_repr */
2603 0, /* tp_as_number */
2604 0, /* tp_as_sequence */
2605 0, /* tp_as_mapping */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002606 (hashfunc)pattern_hash, /* tp_hash */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002607 0, /* tp_call */
2608 0, /* tp_str */
2609 0, /* tp_getattro */
2610 0, /* tp_setattro */
2611 0, /* tp_as_buffer */
2612 Py_TPFLAGS_DEFAULT, /* tp_flags */
2613 pattern_doc, /* tp_doc */
2614 0, /* tp_traverse */
2615 0, /* tp_clear */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002616 pattern_richcompare, /* tp_richcompare */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002617 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2618 0, /* tp_iter */
2619 0, /* tp_iternext */
2620 pattern_methods, /* tp_methods */
2621 pattern_members, /* tp_members */
2622 pattern_getset, /* tp_getset */
2623};
2624
Eric V. Smith605bdae2016-09-11 08:55:43 -04002625/* Match objects do not support length or assignment, but do support
2626 __getitem__. */
2627static PyMappingMethods match_as_mapping = {
2628 NULL,
2629 (binaryfunc)match_getitem,
2630 NULL
2631};
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002632
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002633static PyMethodDef match_methods[] = {
2634 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2635 _SRE_SRE_MATCH_START_METHODDEF
2636 _SRE_SRE_MATCH_END_METHODDEF
2637 _SRE_SRE_MATCH_SPAN_METHODDEF
2638 _SRE_SRE_MATCH_GROUPS_METHODDEF
2639 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2640 _SRE_SRE_MATCH_EXPAND_METHODDEF
2641 _SRE_SRE_MATCH___COPY___METHODDEF
2642 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
Guido van Rossum48b069a2020-04-07 09:50:06 -07002643 {"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O|METH_CLASS,
2644 PyDoc_STR("See PEP 585")},
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002645 {NULL, NULL}
2646};
2647
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002648static PyGetSetDef match_getset[] = {
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002649 {"lastindex", (getter)match_lastindex_get, (setter)NULL,
2650 "The integer index of the last matched capturing group."},
2651 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
2652 "The name of the last matched capturing group."},
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002653 {"regs", (getter)match_regs_get, (setter)NULL},
2654 {NULL}
2655};
2656
2657#define MATCH_OFF(x) offsetof(MatchObject, x)
2658static PyMemberDef match_members[] = {
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002659 {"string", T_OBJECT, MATCH_OFF(string), READONLY,
2660 "The string passed to match() or search()."},
2661 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY,
2662 "The regular expression object."},
2663 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY,
2664 "The index into the string at which the RE engine started looking for a match."},
2665 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY,
2666 "The index into the string beyond which the RE engine will not go."},
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002667 {NULL}
2668};
2669
2670/* FIXME: implement setattr("string", None) as a special case (to
2671 detach the associated string, if any */
2672
2673static PyTypeObject Match_Type = {
2674 PyVarObject_HEAD_INIT(NULL,0)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002675 "re.Match",
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002676 sizeof(MatchObject), sizeof(Py_ssize_t),
2677 (destructor)match_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002678 0, /* tp_vectorcall_offset */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002679 0, /* tp_getattr */
2680 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002681 0, /* tp_as_async */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002682 (reprfunc)match_repr, /* tp_repr */
2683 0, /* tp_as_number */
2684 0, /* tp_as_sequence */
Eric V. Smith605bdae2016-09-11 08:55:43 -04002685 &match_as_mapping, /* tp_as_mapping */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002686 0, /* tp_hash */
2687 0, /* tp_call */
2688 0, /* tp_str */
2689 0, /* tp_getattro */
2690 0, /* tp_setattro */
2691 0, /* tp_as_buffer */
2692 Py_TPFLAGS_DEFAULT, /* tp_flags */
2693 match_doc, /* tp_doc */
2694 0, /* tp_traverse */
2695 0, /* tp_clear */
2696 0, /* tp_richcompare */
2697 0, /* tp_weaklistoffset */
2698 0, /* tp_iter */
2699 0, /* tp_iternext */
2700 match_methods, /* tp_methods */
2701 match_members, /* tp_members */
2702 match_getset, /* tp_getset */
2703};
2704
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002705static PyMethodDef scanner_methods[] = {
2706 _SRE_SRE_SCANNER_MATCH_METHODDEF
2707 _SRE_SRE_SCANNER_SEARCH_METHODDEF
2708 {NULL, NULL}
2709};
2710
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002711#define SCAN_OFF(x) offsetof(ScannerObject, x)
2712static PyMemberDef scanner_members[] = {
2713 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2714 {NULL} /* Sentinel */
2715};
2716
2717static PyTypeObject Scanner_Type = {
2718 PyVarObject_HEAD_INIT(NULL, 0)
2719 "_" SRE_MODULE ".SRE_Scanner",
2720 sizeof(ScannerObject), 0,
2721 (destructor)scanner_dealloc,/* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002722 0, /* tp_vectorcall_offset */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002723 0, /* tp_getattr */
2724 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002725 0, /* tp_as_async */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002726 0, /* tp_repr */
2727 0, /* tp_as_number */
2728 0, /* tp_as_sequence */
2729 0, /* tp_as_mapping */
2730 0, /* tp_hash */
2731 0, /* tp_call */
2732 0, /* tp_str */
2733 0, /* tp_getattro */
2734 0, /* tp_setattro */
2735 0, /* tp_as_buffer */
2736 Py_TPFLAGS_DEFAULT, /* tp_flags */
2737 0, /* tp_doc */
2738 0, /* tp_traverse */
2739 0, /* tp_clear */
2740 0, /* tp_richcompare */
2741 0, /* tp_weaklistoffset */
2742 0, /* tp_iter */
2743 0, /* tp_iternext */
2744 scanner_methods, /* tp_methods */
2745 scanner_members, /* tp_members */
2746 0, /* tp_getset */
2747};
2748
Guido van Rossumb700df92000-03-31 14:59:30 +00002749static PyMethodDef _functions[] = {
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002750 _SRE_COMPILE_METHODDEF
2751 _SRE_GETCODESIZE_METHODDEF
Serhiy Storchaka6d336a02017-05-09 23:37:14 +03002752 _SRE_ASCII_ISCASED_METHODDEF
2753 _SRE_UNICODE_ISCASED_METHODDEF
Serhiy Storchaka7186cc22017-05-05 10:42:46 +03002754 _SRE_ASCII_TOLOWER_METHODDEF
2755 _SRE_UNICODE_TOLOWER_METHODDEF
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002756 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002757};
2758
Martin v. Löwis1a214512008-06-11 05:26:20 +00002759static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002760 PyModuleDef_HEAD_INIT,
2761 "_" SRE_MODULE,
2762 NULL,
2763 -1,
2764 _functions,
2765 NULL,
2766 NULL,
2767 NULL,
2768 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002769};
2770
2771PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002772{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002773 PyObject* m;
2774 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002775 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002776
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002777 /* Patch object types */
2778 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2779 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002780 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002781
Martin v. Löwis1a214512008-06-11 05:26:20 +00002782 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002783 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002784 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002785 d = PyModule_GetDict(m);
2786
Christian Heimes217cfd12007-12-02 14:31:20 +00002787 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002788 if (x) {
2789 PyDict_SetItemString(d, "MAGIC", x);
2790 Py_DECREF(x);
2791 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002792
Christian Heimes217cfd12007-12-02 14:31:20 +00002793 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002794 if (x) {
2795 PyDict_SetItemString(d, "CODESIZE", x);
2796 Py_DECREF(x);
2797 }
2798
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002799 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2800 if (x) {
2801 PyDict_SetItemString(d, "MAXREPEAT", x);
2802 Py_DECREF(x);
2803 }
2804
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03002805 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2806 if (x) {
2807 PyDict_SetItemString(d, "MAXGROUPS", x);
2808 Py_DECREF(x);
2809 }
2810
Neal Norwitzfe537132007-08-26 03:55:15 +00002811 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002812 if (x) {
2813 PyDict_SetItemString(d, "copyright", x);
2814 Py_DECREF(x);
2815 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002816 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002817}
2818
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002819/* vim:ts=4:sw=4:et
2820*/