blob: 75f030cfaa8ff3fb571aced3b8d913f31688b9de [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020038static const char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000063
Fredrik Lundh80946112000-06-29 18:03:25 +000064#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000065#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000066#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000067/* fastest possible local call under MSVC */
68#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000069#else
Benjamin Peterson791dc832017-04-20 23:52:19 -070070#define LOCAL(type) static inline type
Guido van Rossumb700df92000-03-31 14:59:30 +000071#endif
72
73/* error codes */
74#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000075#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000076#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000077#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000078#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000079
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000081#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000082#else
83#define TRACE(v)
84#endif
85
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000086/* -------------------------------------------------------------------- */
87/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000088
Fredrik Lundh436c3d582000-06-29 08:58:44 +000089#define SRE_IS_DIGIT(ch)\
Sergey Fedoseevec014a12018-09-12 03:47:59 +050090 ((ch) <= '9' && Py_ISDIGIT(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000091#define SRE_IS_SPACE(ch)\
Sergey Fedoseevec014a12018-09-12 03:47:59 +050092 ((ch) <= ' ' && Py_ISSPACE(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000093#define SRE_IS_LINEBREAK(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030094 ((ch) == '\n')
Fredrik Lundh436c3d582000-06-29 08:58:44 +000095#define SRE_IS_WORD(ch)\
Sergey Fedoseevec014a12018-09-12 03:47:59 +050096 ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_'))
Guido van Rossumb700df92000-03-31 14:59:30 +000097
Serhiy Storchaka3557b052017-10-24 23:31:42 +030098static unsigned int sre_lower_ascii(unsigned int ch)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000099{
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300100 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000101}
102
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000103/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000104/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
105 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000106#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000107#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
108
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000109static unsigned int sre_lower_locale(unsigned int ch)
110{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000111 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000112}
113
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200114static unsigned int sre_upper_locale(unsigned int ch)
115{
116 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
117}
118
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000119/* unicode-specific character predicates */
120
Victor Stinner0058b862011-09-29 03:27:47 +0200121#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
122#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
123#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
124#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
125#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000126
127static unsigned int sre_lower_unicode(unsigned int ch)
128{
Victor Stinner0058b862011-09-29 03:27:47 +0200129 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000130}
131
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200132static unsigned int sre_upper_unicode(unsigned int ch)
133{
134 return (unsigned int) Py_UNICODE_TOUPPER(ch);
135}
136
Guido van Rossumb700df92000-03-31 14:59:30 +0000137LOCAL(int)
138sre_category(SRE_CODE category, unsigned int ch)
139{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000140 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000141
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000142 case SRE_CATEGORY_DIGIT:
143 return SRE_IS_DIGIT(ch);
144 case SRE_CATEGORY_NOT_DIGIT:
145 return !SRE_IS_DIGIT(ch);
146 case SRE_CATEGORY_SPACE:
147 return SRE_IS_SPACE(ch);
148 case SRE_CATEGORY_NOT_SPACE:
149 return !SRE_IS_SPACE(ch);
150 case SRE_CATEGORY_WORD:
151 return SRE_IS_WORD(ch);
152 case SRE_CATEGORY_NOT_WORD:
153 return !SRE_IS_WORD(ch);
154 case SRE_CATEGORY_LINEBREAK:
155 return SRE_IS_LINEBREAK(ch);
156 case SRE_CATEGORY_NOT_LINEBREAK:
157 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000158
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000159 case SRE_CATEGORY_LOC_WORD:
160 return SRE_LOC_IS_WORD(ch);
161 case SRE_CATEGORY_LOC_NOT_WORD:
162 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000163
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000164 case SRE_CATEGORY_UNI_DIGIT:
165 return SRE_UNI_IS_DIGIT(ch);
166 case SRE_CATEGORY_UNI_NOT_DIGIT:
167 return !SRE_UNI_IS_DIGIT(ch);
168 case SRE_CATEGORY_UNI_SPACE:
169 return SRE_UNI_IS_SPACE(ch);
170 case SRE_CATEGORY_UNI_NOT_SPACE:
171 return !SRE_UNI_IS_SPACE(ch);
172 case SRE_CATEGORY_UNI_WORD:
173 return SRE_UNI_IS_WORD(ch);
174 case SRE_CATEGORY_UNI_NOT_WORD:
175 return !SRE_UNI_IS_WORD(ch);
176 case SRE_CATEGORY_UNI_LINEBREAK:
177 return SRE_UNI_IS_LINEBREAK(ch);
178 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
179 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000180 }
181 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000182}
183
Serhiy Storchaka3557b052017-10-24 23:31:42 +0300184LOCAL(int)
185char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
186{
187 return ch == pattern
188 || (SRE_CODE) sre_lower_locale(ch) == pattern
189 || (SRE_CODE) sre_upper_locale(ch) == pattern;
190}
191
192
Guido van Rossumb700df92000-03-31 14:59:30 +0000193/* helpers */
194
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000195static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000196data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000197{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000198 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000199 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000200 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000201 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000202 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000203}
204
205static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000206data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000207{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000208 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000209 minsize = state->data_stack_base+size;
210 cursize = state->data_stack_size;
211 if (cursize < minsize) {
212 void* stack;
213 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300214 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000215 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000216 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000217 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000218 return SRE_ERROR_MEMORY;
219 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000220 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000221 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000222 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000223 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000224}
225
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000226/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000227
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300228#define SRE_CHAR Py_UCS1
229#define SIZEOF_SRE_CHAR 1
230#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300231#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000232
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300233/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000234
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300235#define SRE_CHAR Py_UCS2
236#define SIZEOF_SRE_CHAR 2
237#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300238#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000239
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300240/* generate 32-bit unicode version */
241
242#define SRE_CHAR Py_UCS4
243#define SIZEOF_SRE_CHAR 4
244#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300245#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000246
247/* -------------------------------------------------------------------- */
248/* factories and destructors */
249
250/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100251static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300252static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
Guido van Rossumb700df92000-03-31 14:59:30 +0000253
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300254
255/*[clinic input]
256module _sre
257class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
258class _sre.SRE_Match "MatchObject *" "&Match_Type"
259class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
260[clinic start generated code]*/
261/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
262
Larry Hastings2d0a69a2015-05-03 14:49:19 -0700263static PyTypeObject Pattern_Type;
264static PyTypeObject Match_Type;
265static PyTypeObject Scanner_Type;
266
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300267/*[clinic input]
268_sre.getcodesize -> int
269[clinic start generated code]*/
270
271static int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300272_sre_getcodesize_impl(PyObject *module)
273/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000274{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300275 return sizeof(SRE_CODE);
Guido van Rossumb700df92000-03-31 14:59:30 +0000276}
277
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300278/*[clinic input]
Serhiy Storchaka6d336a02017-05-09 23:37:14 +0300279_sre.ascii_iscased -> bool
280
281 character: int
282 /
283
284[clinic start generated code]*/
285
286static int
287_sre_ascii_iscased_impl(PyObject *module, int character)
288/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
289{
290 unsigned int ch = (unsigned int)character;
Sergey Fedoseev7f0d59f2018-09-12 17:49:09 +0500291 return ch < 128 && Py_ISALPHA(ch);
Serhiy Storchaka6d336a02017-05-09 23:37:14 +0300292}
293
294/*[clinic input]
295_sre.unicode_iscased -> bool
296
297 character: int
298 /
299
300[clinic start generated code]*/
301
302static int
303_sre_unicode_iscased_impl(PyObject *module, int character)
304/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
305{
306 unsigned int ch = (unsigned int)character;
307 return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
308}
309
310/*[clinic input]
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300311_sre.ascii_tolower -> int
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300312
313 character: int
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300314 /
315
316[clinic start generated code]*/
317
318static int
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300319_sre_ascii_tolower_impl(PyObject *module, int character)
320/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000321{
Serhiy Storchaka3557b052017-10-24 23:31:42 +0300322 return sre_lower_ascii(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000323}
324
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300325/*[clinic input]
326_sre.unicode_tolower -> int
327
328 character: int
329 /
330
331[clinic start generated code]*/
332
333static int
334_sre_unicode_tolower_impl(PyObject *module, int character)
335/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
336{
337 return sre_lower_unicode(character);
338}
339
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000340LOCAL(void)
341state_reset(SRE_STATE* state)
342{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000343 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000344 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000345
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000346 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000347 state->lastindex = -1;
348
349 state->repeat = NULL;
350
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000351 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000352}
353
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000354static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200355getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300356 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600357 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000358{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000359 /* given a python object, return a data pointer, a length (in
360 characters), and a character size. return NULL if the object
361 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000362
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000363 /* Unicode objects do not support the buffer API. So, get the data
364 directly instead. */
365 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200366 if (PyUnicode_READY(string) == -1)
367 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200368 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200369 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300370 *p_isbytes = 0;
371 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000372 }
373
Victor Stinner0058b862011-09-29 03:27:47 +0200374 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300375 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200376 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300377 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000378 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000379
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300380 *p_length = view->len;
381 *p_charsize = 1;
382 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000383
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300384 if (view->buf == NULL) {
385 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
386 PyBuffer_Release(view);
387 view->buf = NULL;
388 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000389 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300390 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000391}
392
393LOCAL(PyObject*)
394state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000395 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000396{
397 /* prepare state object */
398
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000399 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300400 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000401 void* ptr;
402
403 memset(state, 0, sizeof(SRE_STATE));
404
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300405 state->mark = PyMem_New(void *, pattern->groups * 2);
406 if (!state->mark) {
407 PyErr_NoMemory();
408 goto err;
409 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000410 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000411 state->lastindex = -1;
412
Benjamin Petersone48944b2012-03-07 14:50:25 -0600413 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300414 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000415 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600416 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000417
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300418 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600419 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200420 "cannot use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600421 goto err;
422 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300423 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600424 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200425 "cannot use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600426 goto err;
427 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000428
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000429 /* adjust boundaries */
430 if (start < 0)
431 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000432 else if (start > length)
433 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000434
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000435 if (end < 0)
436 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000437 else if (end > length)
438 end = length;
439
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300440 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000441 state->charsize = charsize;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200442 state->match_all = 0;
443 state->must_advance = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000444
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000445 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000446
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000447 state->start = (void*) ((char*) ptr + start * state->charsize);
448 state->end = (void*) ((char*) ptr + end * state->charsize);
449
450 Py_INCREF(string);
451 state->string = string;
452 state->pos = start;
453 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000454
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000455 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600456 err:
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300457 PyMem_Del(state->mark);
458 state->mark = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600459 if (state->buffer.buf)
460 PyBuffer_Release(&state->buffer);
461 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000462}
463
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000464LOCAL(void)
465state_fini(SRE_STATE* state)
466{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600467 if (state->buffer.buf)
468 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000469 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000470 data_stack_dealloc(state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300471 PyMem_Del(state->mark);
472 state->mark = NULL;
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000473}
474
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000475/* calculate offset from start of string */
476#define STATE_OFFSET(state, member)\
477 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
478
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000479LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300480getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300481 PyObject* string, Py_ssize_t start, Py_ssize_t end)
482{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300483 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300484 if (PyBytes_CheckExact(string) &&
485 start == 0 && end == PyBytes_GET_SIZE(string)) {
486 Py_INCREF(string);
487 return string;
488 }
489 return PyBytes_FromStringAndSize(
490 (const char *)ptr + start, end - start);
491 }
492 else {
493 return PyUnicode_Substring(string, start, end);
494 }
495}
496
497LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000498state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000499{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000500 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000501
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000502 index = (index - 1) * 2;
503
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000504 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000505 if (empty)
506 /* want empty string */
507 i = j = 0;
508 else {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200509 Py_RETURN_NONE;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000510 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000511 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000512 i = STATE_OFFSET(state, state->mark[index]);
513 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000514 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000515
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300516 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000517}
518
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000519static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100520pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000521{
522 switch (status) {
523 case SRE_ERROR_RECURSION_LIMIT:
Yury Selivanovf488fb42015-07-03 01:04:23 -0400524 /* This error code seems to be unused. */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000525 PyErr_SetString(
Yury Selivanovf488fb42015-07-03 01:04:23 -0400526 PyExc_RecursionError,
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000527 "maximum recursion limit exceeded"
528 );
529 break;
530 case SRE_ERROR_MEMORY:
531 PyErr_NoMemory();
532 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000533 case SRE_ERROR_INTERRUPTED:
534 /* An exception has already been raised, so let it fly */
535 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000536 default:
537 /* other error codes indicate compiler/engine bugs */
538 PyErr_SetString(
539 PyExc_RuntimeError,
540 "internal error in regular expression engine"
541 );
542 }
543}
544
Guido van Rossumb700df92000-03-31 14:59:30 +0000545static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000546pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000547{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000548 if (self->weakreflist != NULL)
549 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000550 Py_XDECREF(self->pattern);
551 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000552 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000553 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000554}
555
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300556LOCAL(Py_ssize_t)
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200557sre_match(SRE_STATE* state, SRE_CODE* pattern)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300558{
559 if (state->charsize == 1)
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200560 return sre_ucs1_match(state, pattern, 1);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300561 if (state->charsize == 2)
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200562 return sre_ucs2_match(state, pattern, 1);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300563 assert(state->charsize == 4);
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200564 return sre_ucs4_match(state, pattern, 1);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300565}
566
567LOCAL(Py_ssize_t)
568sre_search(SRE_STATE* state, SRE_CODE* pattern)
569{
570 if (state->charsize == 1)
571 return sre_ucs1_search(state, pattern);
572 if (state->charsize == 2)
573 return sre_ucs2_search(state, pattern);
574 assert(state->charsize == 4);
575 return sre_ucs4_search(state, pattern);
576}
577
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300578/*[clinic input]
579_sre.SRE_Pattern.match
580
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200581 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300582 pos: Py_ssize_t = 0
583 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300584
585Matches zero or more characters at the beginning of the string.
586[clinic start generated code]*/
587
Larry Hastings16c51912014-01-07 11:53:01 -0800588static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300589_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200590 Py_ssize_t pos, Py_ssize_t endpos)
591/*[clinic end generated code: output=ea2d838888510661 input=a2ba191647abebe5]*/
Larry Hastings16c51912014-01-07 11:53:01 -0800592{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000593 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100594 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300595 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000596
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300597 if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000598 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000599
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000600 state.ptr = state.start;
601
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000602 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
603
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200604 status = sre_match(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000605
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000606 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300607 if (PyErr_Occurred()) {
608 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000609 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300610 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000611
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300612 match = pattern_new_match(self, &state, status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000613 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300614 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000615}
616
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300617/*[clinic input]
618_sre.SRE_Pattern.fullmatch
619
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200620 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300621 pos: Py_ssize_t = 0
622 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300623
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +0300624Matches against all of the string.
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300625[clinic start generated code]*/
626
627static PyObject *
628_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200629 Py_ssize_t pos, Py_ssize_t endpos)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +0300630/*[clinic end generated code: output=5833c47782a35f4a input=d9fb03a7625b5828]*/
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200631{
632 SRE_STATE state;
633 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300634 PyObject *match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200635
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300636 if (!state_init(&state, self, string, pos, endpos))
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200637 return NULL;
638
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200639 state.ptr = state.start;
640
641 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
642
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200643 state.match_all = 1;
644 status = sre_match(&state, PatternObject_GetCode(self));
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200645
646 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300647 if (PyErr_Occurred()) {
648 state_fini(&state);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200649 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300650 }
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200651
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300652 match = pattern_new_match(self, &state, status);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200653 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300654 return match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200655}
656
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300657/*[clinic input]
658_sre.SRE_Pattern.search
659
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200660 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300661 pos: Py_ssize_t = 0
662 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300663
664Scan through string looking for a match, and return a corresponding match object instance.
665
666Return None if no position in the string matches.
667[clinic start generated code]*/
668
669static PyObject *
670_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200671 Py_ssize_t pos, Py_ssize_t endpos)
672/*[clinic end generated code: output=25f302a644e951e8 input=4ae5cb7dc38fed1b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000673{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000674 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100675 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300676 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000677
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300678 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000679 return NULL;
680
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000681 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
682
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300683 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000684
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000685 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
686
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300687 if (PyErr_Occurred()) {
688 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000689 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300690 }
Thomas Wouters89f507f2006-12-13 04:49:30 +0000691
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300692 match = pattern_new_match(self, &state, status);
693 state_fini(&state);
694 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000695}
696
697static PyObject*
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200698call(const char* module, const char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000699{
700 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000701 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000702 PyObject* func;
703 PyObject* result;
704
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000705 if (!args)
706 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000707 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000708 if (!name)
709 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000710 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000711 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000712 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000713 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000714 func = PyObject_GetAttrString(mod, function);
715 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000716 if (!func)
717 return NULL;
718 result = PyObject_CallObject(func, args);
719 Py_DECREF(func);
720 Py_DECREF(args);
721 return result;
722}
723
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300724/*[clinic input]
725_sre.SRE_Pattern.findall
726
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200727 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300728 pos: Py_ssize_t = 0
729 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300730
731Return a list of all non-overlapping matches of pattern in string.
732[clinic start generated code]*/
733
734static PyObject *
735_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200736 Py_ssize_t pos, Py_ssize_t endpos)
737/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000738{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000739 SRE_STATE state;
740 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100741 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000742 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000743
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300744 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000745 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000746
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000747 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000748 if (!list) {
749 state_fini(&state);
750 return NULL;
751 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000752
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000753 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000754
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000755 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000756
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000757 state_reset(&state);
758
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000759 state.ptr = state.start;
760
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300761 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300762 if (PyErr_Occurred())
763 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000764
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000765 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000766 if (status == 0)
767 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000768 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000769 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000770 }
Tim Peters3d563502006-01-21 02:47:53 +0000771
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000772 /* don't bother to build a match object */
773 switch (self->groups) {
774 case 0:
775 b = STATE_OFFSET(&state, state.start);
776 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300777 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300778 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000779 if (!item)
780 goto error;
781 break;
782 case 1:
783 item = state_getslice(&state, 1, string, 1);
784 if (!item)
785 goto error;
786 break;
787 default:
788 item = PyTuple_New(self->groups);
789 if (!item)
790 goto error;
791 for (i = 0; i < self->groups; i++) {
792 PyObject* o = state_getslice(&state, i+1, string, 1);
793 if (!o) {
794 Py_DECREF(item);
795 goto error;
796 }
797 PyTuple_SET_ITEM(item, i, o);
798 }
799 break;
800 }
801
802 status = PyList_Append(list, item);
803 Py_DECREF(item);
804 if (status < 0)
805 goto error;
806
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200807 state.must_advance = (state.ptr == state.start);
808 state.start = state.ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000809 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000810
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000811 state_fini(&state);
812 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000813
814error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000815 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000816 state_fini(&state);
817 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000818
Guido van Rossumb700df92000-03-31 14:59:30 +0000819}
820
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300821/*[clinic input]
822_sre.SRE_Pattern.finditer
823
824 string: object
825 pos: Py_ssize_t = 0
826 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
827
828Return an iterator over all non-overlapping matches for the RE pattern in string.
829
830For each match, the iterator returns a match object.
831[clinic start generated code]*/
832
833static PyObject *
834_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
835 Py_ssize_t pos, Py_ssize_t endpos)
836/*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
Fredrik Lundh703ce812001-10-24 22:16:30 +0000837{
838 PyObject* scanner;
839 PyObject* search;
840 PyObject* iterator;
841
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300842 scanner = pattern_scanner(self, string, pos, endpos);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000843 if (!scanner)
844 return NULL;
845
846 search = PyObject_GetAttrString(scanner, "search");
847 Py_DECREF(scanner);
848 if (!search)
849 return NULL;
850
851 iterator = PyCallIter_New(search, Py_None);
852 Py_DECREF(search);
853
854 return iterator;
855}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000856
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300857/*[clinic input]
858_sre.SRE_Pattern.scanner
859
860 string: object
861 pos: Py_ssize_t = 0
862 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
863
864[clinic start generated code]*/
865
866static PyObject *
867_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
868 Py_ssize_t pos, Py_ssize_t endpos)
869/*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
870{
871 return pattern_scanner(self, string, pos, endpos);
872}
873
874/*[clinic input]
875_sre.SRE_Pattern.split
876
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200877 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300878 maxsplit: Py_ssize_t = 0
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300879
880Split string by the occurrences of pattern.
881[clinic start generated code]*/
882
883static PyObject *
884_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200885 Py_ssize_t maxsplit)
886/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000887{
888 SRE_STATE state;
889 PyObject* list;
890 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100891 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000892 Py_ssize_t n;
893 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000894 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000895
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200896 assert(self->codesize != 0);
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200897
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300898 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000899 return NULL;
900
901 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000902 if (!list) {
903 state_fini(&state);
904 return NULL;
905 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000906
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000907 n = 0;
908 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000909
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000910 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000911
912 state_reset(&state);
913
914 state.ptr = state.start;
915
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300916 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300917 if (PyErr_Occurred())
918 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000919
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000920 if (status <= 0) {
921 if (status == 0)
922 break;
923 pattern_error(status);
924 goto error;
925 }
Tim Peters3d563502006-01-21 02:47:53 +0000926
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000927 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300928 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000929 string, STATE_OFFSET(&state, last),
930 STATE_OFFSET(&state, state.start)
931 );
932 if (!item)
933 goto error;
934 status = PyList_Append(list, item);
935 Py_DECREF(item);
936 if (status < 0)
937 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000938
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000939 /* add groups (if any) */
940 for (i = 0; i < self->groups; i++) {
941 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000942 if (!item)
943 goto error;
944 status = PyList_Append(list, item);
945 Py_DECREF(item);
946 if (status < 0)
947 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000948 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000949
950 n = n + 1;
Serhiy Storchakafbb490f2018-01-04 11:06:13 +0200951 state.must_advance = (state.ptr == state.start);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000952 last = state.start = state.ptr;
953
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000954 }
955
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000956 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300957 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000958 string, STATE_OFFSET(&state, last), state.endpos
959 );
960 if (!item)
961 goto error;
962 status = PyList_Append(list, item);
963 Py_DECREF(item);
964 if (status < 0)
965 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000966
967 state_fini(&state);
968 return list;
969
970error:
971 Py_DECREF(list);
972 state_fini(&state);
973 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000974
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000975}
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000976
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000977static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000978pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000979 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000980{
981 SRE_STATE state;
982 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +0300983 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000984 PyObject* item;
985 PyObject* filter;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000986 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000987 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100988 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000989 Py_ssize_t n;
990 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300991 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000992 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600993 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000994
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000995 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +0000996 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000997 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +0000998 Py_INCREF(filter);
999 filter_is_callable = 1;
1000 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001001 /* if not callable, check if it's a literal string */
1002 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001003 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001004 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001005 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001006 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001007 if (charsize == 1)
1008 literal = memchr(ptr, '\\', n) == NULL;
1009 else
1010 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001011 } else {
1012 PyErr_Clear();
1013 literal = 0;
1014 }
Benjamin Petersone48944b2012-03-07 14:50:25 -06001015 if (view.buf)
1016 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001017 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001018 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001019 Py_INCREF(filter);
1020 filter_is_callable = 0;
1021 } else {
1022 /* not a literal; hand it over to the template compiler */
1023 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001024 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001025 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001026 );
1027 if (!filter)
1028 return NULL;
1029 filter_is_callable = PyCallable_Check(filter);
1030 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00001031 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001032
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001033 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001034 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001035 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001036 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001037
1038 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001039 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001040 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001041 state_fini(&state);
1042 return NULL;
1043 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001044
1045 n = i = 0;
1046
1047 while (!count || n < count) {
1048
1049 state_reset(&state);
1050
1051 state.ptr = state.start;
1052
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001053 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001054 if (PyErr_Occurred())
1055 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001056
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001057 if (status <= 0) {
1058 if (status == 0)
1059 break;
1060 pattern_error(status);
1061 goto error;
1062 }
Tim Peters3d563502006-01-21 02:47:53 +00001063
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001064 b = STATE_OFFSET(&state, state.start);
1065 e = STATE_OFFSET(&state, state.ptr);
1066
1067 if (i < b) {
1068 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001069 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001070 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001071 if (!item)
1072 goto error;
1073 status = PyList_Append(list, item);
1074 Py_DECREF(item);
1075 if (status < 0)
1076 goto error;
1077
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02001078 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001079
1080 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001081 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001082 match = pattern_new_match(self, &state, 1);
1083 if (!match)
1084 goto error;
Victor Stinner7bfb42d2016-12-05 17:04:32 +01001085 item = PyObject_CallFunctionObjArgs(filter, match, NULL);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001086 Py_DECREF(match);
1087 if (!item)
1088 goto error;
1089 } else {
1090 /* filter is literal string */
1091 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001092 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001093 }
1094
1095 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001096 if (item != Py_None) {
1097 status = PyList_Append(list, item);
1098 Py_DECREF(item);
1099 if (status < 0)
1100 goto error;
1101 }
Tim Peters3d563502006-01-21 02:47:53 +00001102
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001103 i = e;
1104 n = n + 1;
Serhiy Storchakafbb490f2018-01-04 11:06:13 +02001105 state.must_advance = (state.ptr == state.start);
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02001106 state.start = state.ptr;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001107 }
1108
1109 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001110 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001111 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001112 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001113 if (!item)
1114 goto error;
1115 status = PyList_Append(list, item);
1116 Py_DECREF(item);
1117 if (status < 0)
1118 goto error;
1119 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001120
1121 state_fini(&state);
1122
Guido van Rossum4e173842001-12-07 04:25:10 +00001123 Py_DECREF(filter);
1124
Fredrik Lundhdac58492001-10-21 21:48:30 +00001125 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001126 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001127 if (!joiner) {
1128 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001129 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001130 }
1131 if (PyList_GET_SIZE(list) == 0) {
1132 Py_DECREF(list);
1133 item = joiner;
1134 }
1135 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001136 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001137 item = _PyBytes_Join(joiner, list);
1138 else
1139 item = PyUnicode_Join(joiner, list);
1140 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001141 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001142 if (!item)
1143 return NULL;
1144 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001145
1146 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001147 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001148
1149 return item;
1150
1151error:
1152 Py_DECREF(list);
1153 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001154 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001155 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001156
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001157}
1158
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001159/*[clinic input]
1160_sre.SRE_Pattern.sub
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001161
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001162 repl: object
1163 string: object
1164 count: Py_ssize_t = 0
1165
1166Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1167[clinic start generated code]*/
1168
1169static PyObject *
1170_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1171 PyObject *string, Py_ssize_t count)
1172/*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1173{
1174 return pattern_subx(self, repl, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001175}
1176
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001177/*[clinic input]
1178_sre.SRE_Pattern.subn
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001179
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001180 repl: object
1181 string: object
1182 count: Py_ssize_t = 0
1183
1184Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1185[clinic start generated code]*/
1186
1187static PyObject *
1188_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1189 PyObject *string, Py_ssize_t count)
1190/*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1191{
1192 return pattern_subx(self, repl, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001193}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001194
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001195/*[clinic input]
1196_sre.SRE_Pattern.__copy__
1197
1198[clinic start generated code]*/
1199
1200static PyObject *
1201_sre_SRE_Pattern___copy___impl(PatternObject *self)
1202/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001203{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001204 Py_INCREF(self);
1205 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001206}
1207
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001208/*[clinic input]
1209_sre.SRE_Pattern.__deepcopy__
1210
1211 memo: object
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001212 /
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001213
1214[clinic start generated code]*/
1215
1216static PyObject *
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001217_sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
1218/*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001219{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001220 Py_INCREF(self);
1221 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001222}
1223
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001224static PyObject *
1225pattern_repr(PatternObject *obj)
1226{
1227 static const struct {
1228 const char *name;
1229 int value;
1230 } flag_names[] = {
1231 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1232 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1233 {"re.LOCALE", SRE_FLAG_LOCALE},
1234 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1235 {"re.DOTALL", SRE_FLAG_DOTALL},
1236 {"re.UNICODE", SRE_FLAG_UNICODE},
1237 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1238 {"re.DEBUG", SRE_FLAG_DEBUG},
1239 {"re.ASCII", SRE_FLAG_ASCII},
1240 };
1241 PyObject *result = NULL;
1242 PyObject *flag_items;
Victor Stinner706768c2014-08-16 01:03:39 +02001243 size_t i;
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001244 int flags = obj->flags;
1245
1246 /* Omit re.UNICODE for valid string patterns. */
1247 if (obj->isbytes == 0 &&
1248 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1249 SRE_FLAG_UNICODE)
1250 flags &= ~SRE_FLAG_UNICODE;
1251
1252 flag_items = PyList_New(0);
1253 if (!flag_items)
1254 return NULL;
1255
1256 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1257 if (flags & flag_names[i].value) {
1258 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1259 if (!item)
1260 goto done;
1261
1262 if (PyList_Append(flag_items, item) < 0) {
1263 Py_DECREF(item);
1264 goto done;
1265 }
1266 Py_DECREF(item);
1267 flags &= ~flag_names[i].value;
1268 }
1269 }
1270 if (flags) {
1271 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1272 if (!item)
1273 goto done;
1274
1275 if (PyList_Append(flag_items, item) < 0) {
1276 Py_DECREF(item);
1277 goto done;
1278 }
1279 Py_DECREF(item);
1280 }
1281
1282 if (PyList_Size(flag_items) > 0) {
1283 PyObject *flags_result;
1284 PyObject *sep = PyUnicode_FromString("|");
1285 if (!sep)
1286 goto done;
1287 flags_result = PyUnicode_Join(sep, flag_items);
1288 Py_DECREF(sep);
1289 if (!flags_result)
1290 goto done;
1291 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1292 obj->pattern, flags_result);
1293 Py_DECREF(flags_result);
1294 }
1295 else {
1296 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1297 }
1298
1299done:
1300 Py_DECREF(flag_items);
1301 return result;
1302}
1303
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03001304PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001305
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001306/* PatternObject's 'groupindex' method. */
1307static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02001308pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored))
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001309{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001310 if (self->groupindex == NULL)
1311 return PyDict_New();
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001312 return PyDictProxy_New(self->groupindex);
1313}
1314
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001315static int _validate(PatternObject *self); /* Forward */
1316
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001317/*[clinic input]
1318_sre.compile
1319
1320 pattern: object
1321 flags: int
1322 code: object(subclass_of='&PyList_Type')
1323 groups: Py_ssize_t
Victor Stinner726a57d2016-11-22 23:04:39 +01001324 groupindex: object(subclass_of='&PyDict_Type')
1325 indexgroup: object(subclass_of='&PyTuple_Type')
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001326
1327[clinic start generated code]*/
1328
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001329static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001330_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001331 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1332 PyObject *indexgroup)
Victor Stinner726a57d2016-11-22 23:04:39 +01001333/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001334{
1335 /* "compile" pattern descriptor to pattern object */
1336
1337 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001338 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001339
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001340 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001341 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001342 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1343 if (!self)
1344 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001345 self->weakreflist = NULL;
1346 self->pattern = NULL;
1347 self->groupindex = NULL;
1348 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001349
1350 self->codesize = n;
1351
1352 for (i = 0; i < n; i++) {
1353 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001354 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001355 self->code[i] = (SRE_CODE) value;
1356 if ((unsigned long) self->code[i] != value) {
1357 PyErr_SetString(PyExc_OverflowError,
1358 "regular expression code size limit exceeded");
1359 break;
1360 }
1361 }
1362
1363 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001364 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001365 return NULL;
1366 }
1367
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001368 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001369 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001370 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001371 else {
1372 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001373 int charsize;
1374 Py_buffer view;
1375 view.buf = NULL;
1376 if (!getstring(pattern, &p_length, &self->isbytes,
1377 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001378 Py_DECREF(self);
1379 return NULL;
1380 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001381 if (view.buf)
1382 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001383 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001384
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001385 Py_INCREF(pattern);
1386 self->pattern = pattern;
1387
1388 self->flags = flags;
1389
1390 self->groups = groups;
1391
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001392 if (PyDict_GET_SIZE(groupindex) > 0) {
1393 Py_INCREF(groupindex);
1394 self->groupindex = groupindex;
1395 if (PyTuple_GET_SIZE(indexgroup) > 0) {
1396 Py_INCREF(indexgroup);
1397 self->indexgroup = indexgroup;
1398 }
1399 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001400
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001401 if (!_validate(self)) {
1402 Py_DECREF(self);
1403 return NULL;
1404 }
1405
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001406 return (PyObject*) self;
1407}
1408
Guido van Rossumb700df92000-03-31 14:59:30 +00001409/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001410/* Code validation */
1411
1412/* To learn more about this code, have a look at the _compile() function in
1413 Lib/sre_compile.py. The validation functions below checks the code array
1414 for conformance with the code patterns generated there.
1415
1416 The nice thing about the generated code is that it is position-independent:
1417 all jumps are relative jumps forward. Also, jumps don't cross each other:
1418 the target of a later jump is always earlier than the target of an earlier
1419 jump. IOW, this is okay:
1420
1421 J---------J-------T--------T
1422 \ \_____/ /
1423 \______________________/
1424
1425 but this is not:
1426
1427 J---------J-------T--------T
1428 \_________\_____/ /
1429 \____________/
1430
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001431 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001432*/
1433
1434/* Defining this one enables tracing of the validator */
1435#undef VVERBOSE
1436
1437/* Trace macro for the validator */
1438#if defined(VVERBOSE)
1439#define VTRACE(v) printf v
1440#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001441#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001442#endif
1443
1444/* Report failure */
1445#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1446
1447/* Extract opcode, argument, or skip count from code array */
1448#define GET_OP \
1449 do { \
1450 VTRACE(("%p: ", code)); \
1451 if (code >= end) FAIL; \
1452 op = *code++; \
1453 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1454 } while (0)
1455#define GET_ARG \
1456 do { \
1457 VTRACE(("%p= ", code)); \
1458 if (code >= end) FAIL; \
1459 arg = *code++; \
1460 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1461 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001462#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001463 do { \
1464 VTRACE(("%p= ", code)); \
1465 if (code >= end) FAIL; \
1466 skip = *code; \
1467 VTRACE(("%lu (skip to %p)\n", \
1468 (unsigned long)skip, code+skip)); \
Benjamin Petersonca470632016-09-06 13:47:26 -07001469 if (skip-adj > (uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001470 FAIL; \
1471 code++; \
1472 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001473#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001474
1475static int
1476_validate_charset(SRE_CODE *code, SRE_CODE *end)
1477{
1478 /* Some variables are manipulated by the macros above */
1479 SRE_CODE op;
1480 SRE_CODE arg;
1481 SRE_CODE offset;
1482 int i;
1483
1484 while (code < end) {
1485 GET_OP;
1486 switch (op) {
1487
1488 case SRE_OP_NEGATE:
1489 break;
1490
1491 case SRE_OP_LITERAL:
1492 GET_ARG;
1493 break;
1494
1495 case SRE_OP_RANGE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001496 case SRE_OP_RANGE_UNI_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001497 GET_ARG;
1498 GET_ARG;
1499 break;
1500
1501 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001502 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Benjamin Petersonca470632016-09-06 13:47:26 -07001503 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001504 FAIL;
1505 code += offset;
1506 break;
1507
1508 case SRE_OP_BIGCHARSET:
1509 GET_ARG; /* Number of blocks */
1510 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001511 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001512 FAIL;
1513 /* Make sure that each byte points to a valid block */
1514 for (i = 0; i < 256; i++) {
1515 if (((unsigned char *)code)[i] >= arg)
1516 FAIL;
1517 }
1518 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001519 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Benjamin Petersonca470632016-09-06 13:47:26 -07001520 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001521 FAIL;
1522 code += offset;
1523 break;
1524
1525 case SRE_OP_CATEGORY:
1526 GET_ARG;
1527 switch (arg) {
1528 case SRE_CATEGORY_DIGIT:
1529 case SRE_CATEGORY_NOT_DIGIT:
1530 case SRE_CATEGORY_SPACE:
1531 case SRE_CATEGORY_NOT_SPACE:
1532 case SRE_CATEGORY_WORD:
1533 case SRE_CATEGORY_NOT_WORD:
1534 case SRE_CATEGORY_LINEBREAK:
1535 case SRE_CATEGORY_NOT_LINEBREAK:
1536 case SRE_CATEGORY_LOC_WORD:
1537 case SRE_CATEGORY_LOC_NOT_WORD:
1538 case SRE_CATEGORY_UNI_DIGIT:
1539 case SRE_CATEGORY_UNI_NOT_DIGIT:
1540 case SRE_CATEGORY_UNI_SPACE:
1541 case SRE_CATEGORY_UNI_NOT_SPACE:
1542 case SRE_CATEGORY_UNI_WORD:
1543 case SRE_CATEGORY_UNI_NOT_WORD:
1544 case SRE_CATEGORY_UNI_LINEBREAK:
1545 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1546 break;
1547 default:
1548 FAIL;
1549 }
1550 break;
1551
1552 default:
1553 FAIL;
1554
1555 }
1556 }
1557
1558 return 1;
1559}
1560
1561static int
1562_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1563{
1564 /* Some variables are manipulated by the macros above */
1565 SRE_CODE op;
1566 SRE_CODE arg;
1567 SRE_CODE skip;
1568
1569 VTRACE(("code=%p, end=%p\n", code, end));
1570
1571 if (code > end)
1572 FAIL;
1573
1574 while (code < end) {
1575 GET_OP;
1576 switch (op) {
1577
1578 case SRE_OP_MARK:
1579 /* We don't check whether marks are properly nested; the
1580 sre_match() code is robust even if they don't, and the worst
1581 you can get is nonsensical match results. */
1582 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001583 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001584 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1585 FAIL;
1586 }
1587 break;
1588
1589 case SRE_OP_LITERAL:
1590 case SRE_OP_NOT_LITERAL:
1591 case SRE_OP_LITERAL_IGNORE:
1592 case SRE_OP_NOT_LITERAL_IGNORE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001593 case SRE_OP_LITERAL_UNI_IGNORE:
1594 case SRE_OP_NOT_LITERAL_UNI_IGNORE:
Serhiy Storchaka898ff032017-05-05 08:53:40 +03001595 case SRE_OP_LITERAL_LOC_IGNORE:
1596 case SRE_OP_NOT_LITERAL_LOC_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001597 GET_ARG;
1598 /* The arg is just a character, nothing to check */
1599 break;
1600
1601 case SRE_OP_SUCCESS:
1602 case SRE_OP_FAILURE:
1603 /* Nothing to check; these normally end the matching process */
1604 break;
1605
1606 case SRE_OP_AT:
1607 GET_ARG;
1608 switch (arg) {
1609 case SRE_AT_BEGINNING:
1610 case SRE_AT_BEGINNING_STRING:
1611 case SRE_AT_BEGINNING_LINE:
1612 case SRE_AT_END:
1613 case SRE_AT_END_LINE:
1614 case SRE_AT_END_STRING:
1615 case SRE_AT_BOUNDARY:
1616 case SRE_AT_NON_BOUNDARY:
1617 case SRE_AT_LOC_BOUNDARY:
1618 case SRE_AT_LOC_NON_BOUNDARY:
1619 case SRE_AT_UNI_BOUNDARY:
1620 case SRE_AT_UNI_NON_BOUNDARY:
1621 break;
1622 default:
1623 FAIL;
1624 }
1625 break;
1626
1627 case SRE_OP_ANY:
1628 case SRE_OP_ANY_ALL:
1629 /* These have no operands */
1630 break;
1631
1632 case SRE_OP_IN:
1633 case SRE_OP_IN_IGNORE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001634 case SRE_OP_IN_UNI_IGNORE:
Serhiy Storchaka898ff032017-05-05 08:53:40 +03001635 case SRE_OP_IN_LOC_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001636 GET_SKIP;
1637 /* Stop 1 before the end; we check the FAILURE below */
1638 if (!_validate_charset(code, code+skip-2))
1639 FAIL;
1640 if (code[skip-2] != SRE_OP_FAILURE)
1641 FAIL;
1642 code += skip-1;
1643 break;
1644
1645 case SRE_OP_INFO:
1646 {
1647 /* A minimal info field is
1648 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1649 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1650 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001651 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001652 SRE_CODE *newcode;
1653 GET_SKIP;
1654 newcode = code+skip-1;
1655 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001656 GET_ARG;
1657 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001658 /* Check that only valid flags are present */
1659 if ((flags & ~(SRE_INFO_PREFIX |
1660 SRE_INFO_LITERAL |
1661 SRE_INFO_CHARSET)) != 0)
1662 FAIL;
1663 /* PREFIX and CHARSET are mutually exclusive */
1664 if ((flags & SRE_INFO_PREFIX) &&
1665 (flags & SRE_INFO_CHARSET))
1666 FAIL;
1667 /* LITERAL implies PREFIX */
1668 if ((flags & SRE_INFO_LITERAL) &&
1669 !(flags & SRE_INFO_PREFIX))
1670 FAIL;
1671 /* Validate the prefix */
1672 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001673 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001674 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001675 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001676 /* Here comes the prefix string */
Benjamin Petersonca470632016-09-06 13:47:26 -07001677 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001678 FAIL;
1679 code += prefix_len;
1680 /* And here comes the overlap table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001681 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001682 FAIL;
1683 /* Each overlap value should be < prefix_len */
1684 for (i = 0; i < prefix_len; i++) {
1685 if (code[i] >= prefix_len)
1686 FAIL;
1687 }
1688 code += prefix_len;
1689 }
1690 /* Validate the charset */
1691 if (flags & SRE_INFO_CHARSET) {
1692 if (!_validate_charset(code, newcode-1))
1693 FAIL;
1694 if (newcode[-1] != SRE_OP_FAILURE)
1695 FAIL;
1696 code = newcode;
1697 }
1698 else if (code != newcode) {
1699 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1700 FAIL;
1701 }
1702 }
1703 break;
1704
1705 case SRE_OP_BRANCH:
1706 {
1707 SRE_CODE *target = NULL;
1708 for (;;) {
1709 GET_SKIP;
1710 if (skip == 0)
1711 break;
1712 /* Stop 2 before the end; we check the JUMP below */
1713 if (!_validate_inner(code, code+skip-3, groups))
1714 FAIL;
1715 code += skip-3;
1716 /* Check that it ends with a JUMP, and that each JUMP
1717 has the same target */
1718 GET_OP;
1719 if (op != SRE_OP_JUMP)
1720 FAIL;
1721 GET_SKIP;
1722 if (target == NULL)
1723 target = code+skip-1;
1724 else if (code+skip-1 != target)
1725 FAIL;
1726 }
1727 }
1728 break;
1729
1730 case SRE_OP_REPEAT_ONE:
1731 case SRE_OP_MIN_REPEAT_ONE:
1732 {
1733 SRE_CODE min, max;
1734 GET_SKIP;
1735 GET_ARG; min = arg;
1736 GET_ARG; max = arg;
1737 if (min > max)
1738 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001739 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001740 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001741 if (!_validate_inner(code, code+skip-4, groups))
1742 FAIL;
1743 code += skip-4;
1744 GET_OP;
1745 if (op != SRE_OP_SUCCESS)
1746 FAIL;
1747 }
1748 break;
1749
1750 case SRE_OP_REPEAT:
1751 {
1752 SRE_CODE min, max;
1753 GET_SKIP;
1754 GET_ARG; min = arg;
1755 GET_ARG; max = arg;
1756 if (min > max)
1757 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001758 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001759 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001760 if (!_validate_inner(code, code+skip-3, groups))
1761 FAIL;
1762 code += skip-3;
1763 GET_OP;
1764 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1765 FAIL;
1766 }
1767 break;
1768
1769 case SRE_OP_GROUPREF:
1770 case SRE_OP_GROUPREF_IGNORE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001771 case SRE_OP_GROUPREF_UNI_IGNORE:
1772 case SRE_OP_GROUPREF_LOC_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001773 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001774 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001775 FAIL;
1776 break;
1777
1778 case SRE_OP_GROUPREF_EXISTS:
1779 /* The regex syntax for this is: '(?(group)then|else)', where
1780 'group' is either an integer group number or a group name,
1781 'then' and 'else' are sub-regexes, and 'else' is optional. */
1782 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001783 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001784 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001785 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001786 code--; /* The skip is relative to the first arg! */
1787 /* There are two possibilities here: if there is both a 'then'
1788 part and an 'else' part, the generated code looks like:
1789
1790 GROUPREF_EXISTS
1791 <group>
1792 <skipyes>
1793 ...then part...
1794 JUMP
1795 <skipno>
1796 (<skipyes> jumps here)
1797 ...else part...
1798 (<skipno> jumps here)
1799
1800 If there is only a 'then' part, it looks like:
1801
1802 GROUPREF_EXISTS
1803 <group>
1804 <skip>
1805 ...then part...
1806 (<skip> jumps here)
1807
1808 There is no direct way to decide which it is, and we don't want
1809 to allow arbitrary jumps anywhere in the code; so we just look
1810 for a JUMP opcode preceding our skip target.
1811 */
Benjamin Petersonca470632016-09-06 13:47:26 -07001812 if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001813 code[skip-3] == SRE_OP_JUMP)
1814 {
1815 VTRACE(("both then and else parts present\n"));
1816 if (!_validate_inner(code+1, code+skip-3, groups))
1817 FAIL;
1818 code += skip-2; /* Position after JUMP, at <skipno> */
1819 GET_SKIP;
1820 if (!_validate_inner(code, code+skip-1, groups))
1821 FAIL;
1822 code += skip-1;
1823 }
1824 else {
1825 VTRACE(("only a then part present\n"));
1826 if (!_validate_inner(code+1, code+skip-1, groups))
1827 FAIL;
1828 code += skip-1;
1829 }
1830 break;
1831
1832 case SRE_OP_ASSERT:
1833 case SRE_OP_ASSERT_NOT:
1834 GET_SKIP;
1835 GET_ARG; /* 0 for lookahead, width for lookbehind */
1836 code--; /* Back up over arg to simplify math below */
1837 if (arg & 0x80000000)
1838 FAIL; /* Width too large */
1839 /* Stop 1 before the end; we check the SUCCESS below */
1840 if (!_validate_inner(code+1, code+skip-2, groups))
1841 FAIL;
1842 code += skip-2;
1843 GET_OP;
1844 if (op != SRE_OP_SUCCESS)
1845 FAIL;
1846 break;
1847
1848 default:
1849 FAIL;
1850
1851 }
1852 }
1853
1854 VTRACE(("okay\n"));
1855 return 1;
1856}
1857
1858static int
1859_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1860{
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001861 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1862 code >= end || end[-1] != SRE_OP_SUCCESS)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001863 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001864 return _validate_inner(code, end-1, groups);
1865}
1866
1867static int
1868_validate(PatternObject *self)
1869{
1870 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1871 {
1872 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1873 return 0;
1874 }
1875 else
1876 VTRACE(("Success!\n"));
1877 return 1;
1878}
1879
1880/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001881/* match methods */
1882
1883static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001884match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001885{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001886 Py_XDECREF(self->regs);
1887 Py_XDECREF(self->string);
1888 Py_DECREF(self->pattern);
1889 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001890}
1891
1892static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001893match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001894{
Serhiy Storchaka25324972013-10-16 12:46:28 +03001895 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001896 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001897 Py_buffer view;
1898 PyObject *result;
1899 void* ptr;
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02001900 Py_ssize_t i, j;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001901
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001902 if (index < 0 || index >= self->groups) {
1903 /* raise IndexError if we were given a bad group number */
1904 PyErr_SetString(
1905 PyExc_IndexError,
1906 "no such group"
1907 );
1908 return NULL;
1909 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001910
Fredrik Lundh6f013982000-07-03 18:44:21 +00001911 index *= 2;
1912
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001913 if (self->string == Py_None || self->mark[index] < 0) {
1914 /* return default value if the string or group is undefined */
1915 Py_INCREF(def);
1916 return def;
1917 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001918
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001919 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001920 if (ptr == NULL)
1921 return NULL;
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02001922
1923 i = self->mark[index];
1924 j = self->mark[index+1];
1925 i = Py_MIN(i, length);
1926 j = Py_MIN(j, length);
1927 result = getslice(isbytes, ptr, self->string, i, j);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001928 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001929 PyBuffer_Release(&view);
1930 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001931}
1932
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001933static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001934match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001935{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001936 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001937
Guido van Rossumddefaf32007-01-14 03:31:43 +00001938 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001939 /* Default value */
1940 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001941
Serhiy Storchaka977b3ac2016-06-18 16:48:07 +03001942 if (PyIndex_Check(index)) {
1943 return PyNumber_AsSsize_t(index, NULL);
1944 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001945
Fredrik Lundh6f013982000-07-03 18:44:21 +00001946 i = -1;
1947
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001948 if (self->pattern->groupindex) {
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001949 index = PyDict_GetItem(self->pattern->groupindex, index);
1950 if (index && PyLong_Check(index)) {
1951 i = PyLong_AsSsize_t(index);
1952 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001953 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001954
1955 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001956}
1957
1958static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001959match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001960{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001961 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001962}
1963
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001964/*[clinic input]
1965_sre.SRE_Match.expand
1966
1967 template: object
1968
1969Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
1970[clinic start generated code]*/
1971
1972static PyObject *
1973_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
1974/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001975{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001976 /* delegate to Python code */
1977 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001978 SRE_PY_MODULE, "_expand",
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001979 PyTuple_Pack(3, self->pattern, self, template)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001980 );
1981}
1982
1983static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001984match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001985{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001986 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001987 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001988
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001989 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001990
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001991 switch (size) {
1992 case 0:
Serhiy Storchakaba85d692017-03-30 09:09:41 +03001993 result = match_getslice(self, _PyLong_Zero, Py_None);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001994 break;
1995 case 1:
1996 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
1997 break;
1998 default:
1999 /* fetch multiple items */
2000 result = PyTuple_New(size);
2001 if (!result)
2002 return NULL;
2003 for (i = 0; i < size; i++) {
2004 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002005 self, PyTuple_GET_ITEM(args, i), Py_None
2006 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002007 if (!item) {
2008 Py_DECREF(result);
2009 return NULL;
2010 }
2011 PyTuple_SET_ITEM(result, i, item);
2012 }
2013 break;
2014 }
2015 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002016}
2017
Eric V. Smith605bdae2016-09-11 08:55:43 -04002018static PyObject*
2019match_getitem(MatchObject* self, PyObject* name)
2020{
2021 return match_getslice(self, name, Py_None);
2022}
2023
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002024/*[clinic input]
2025_sre.SRE_Match.groups
2026
2027 default: object = None
2028 Is used for groups that did not participate in the match.
2029
2030Return a tuple containing all the subgroups of the match, from 1.
2031[clinic start generated code]*/
2032
2033static PyObject *
2034_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2035/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002036{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002037 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002038 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002039
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002040 result = PyTuple_New(self->groups-1);
2041 if (!result)
2042 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002043
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002044 for (index = 1; index < self->groups; index++) {
2045 PyObject* item;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002046 item = match_getslice_by_index(self, index, default_value);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002047 if (!item) {
2048 Py_DECREF(result);
2049 return NULL;
2050 }
2051 PyTuple_SET_ITEM(result, index-1, item);
2052 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002053
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002054 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002055}
2056
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002057/*[clinic input]
2058_sre.SRE_Match.groupdict
2059
2060 default: object = None
2061 Is used for groups that did not participate in the match.
2062
2063Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2064[clinic start generated code]*/
2065
2066static PyObject *
2067_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2068/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002069{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002070 PyObject *result;
2071 PyObject *key;
2072 PyObject *value;
2073 Py_ssize_t pos = 0;
2074 Py_hash_t hash;
Guido van Rossumb700df92000-03-31 14:59:30 +00002075
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002076 result = PyDict_New();
2077 if (!result || !self->pattern->groupindex)
2078 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002079
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002080 while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002081 int status;
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002082 Py_INCREF(key);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002083 value = match_getslice(self, key, default_value);
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002084 if (!value) {
2085 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002086 goto failed;
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002087 }
2088 status = _PyDict_SetItem_KnownHash(result, key, value, hash);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002089 Py_DECREF(value);
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002090 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002091 if (status < 0)
2092 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002093 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002094
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002095 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002096
2097failed:
Fredrik Lundh770617b2001-01-14 15:06:11 +00002098 Py_DECREF(result);
2099 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002100}
2101
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002102/*[clinic input]
2103_sre.SRE_Match.start -> Py_ssize_t
2104
2105 group: object(c_default="NULL") = 0
2106 /
2107
2108Return index of the start of the substring matched by group.
2109[clinic start generated code]*/
2110
2111static Py_ssize_t
2112_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2113/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002114{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002115 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002116
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002117 if (index < 0 || index >= self->groups) {
2118 PyErr_SetString(
2119 PyExc_IndexError,
2120 "no such group"
2121 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002122 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002123 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002124
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002125 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002126 return self->mark[index*2];
Guido van Rossumb700df92000-03-31 14:59:30 +00002127}
2128
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002129/*[clinic input]
2130_sre.SRE_Match.end -> Py_ssize_t
2131
2132 group: object(c_default="NULL") = 0
2133 /
2134
2135Return index of the end of the substring matched by group.
2136[clinic start generated code]*/
2137
2138static Py_ssize_t
2139_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2140/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002141{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002142 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002143
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002144 if (index < 0 || index >= self->groups) {
2145 PyErr_SetString(
2146 PyExc_IndexError,
2147 "no such group"
2148 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002149 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002150 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002151
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002152 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002153 return self->mark[index*2+1];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002154}
2155
2156LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002157_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002158{
2159 PyObject* pair;
2160 PyObject* item;
2161
2162 pair = PyTuple_New(2);
2163 if (!pair)
2164 return NULL;
2165
Christian Heimes217cfd12007-12-02 14:31:20 +00002166 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002167 if (!item)
2168 goto error;
2169 PyTuple_SET_ITEM(pair, 0, item);
2170
Christian Heimes217cfd12007-12-02 14:31:20 +00002171 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002172 if (!item)
2173 goto error;
2174 PyTuple_SET_ITEM(pair, 1, item);
2175
2176 return pair;
2177
2178 error:
2179 Py_DECREF(pair);
2180 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002181}
2182
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002183/*[clinic input]
2184_sre.SRE_Match.span
2185
2186 group: object(c_default="NULL") = 0
2187 /
2188
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002189For match object m, return the 2-tuple (m.start(group), m.end(group)).
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002190[clinic start generated code]*/
2191
2192static PyObject *
2193_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002194/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002195{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002196 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002197
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002198 if (index < 0 || index >= self->groups) {
2199 PyErr_SetString(
2200 PyExc_IndexError,
2201 "no such group"
2202 );
2203 return NULL;
2204 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002205
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002206 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002207 return _pair(self->mark[index*2], self->mark[index*2+1]);
2208}
2209
2210static PyObject*
2211match_regs(MatchObject* self)
2212{
2213 PyObject* regs;
2214 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002215 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002216
2217 regs = PyTuple_New(self->groups);
2218 if (!regs)
2219 return NULL;
2220
2221 for (index = 0; index < self->groups; index++) {
2222 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2223 if (!item) {
2224 Py_DECREF(regs);
2225 return NULL;
2226 }
2227 PyTuple_SET_ITEM(regs, index, item);
2228 }
2229
2230 Py_INCREF(regs);
2231 self->regs = regs;
2232
2233 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002234}
2235
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002236/*[clinic input]
2237_sre.SRE_Match.__copy__
2238
2239[clinic start generated code]*/
2240
2241static PyObject *
2242_sre_SRE_Match___copy___impl(MatchObject *self)
2243/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002244{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002245 Py_INCREF(self);
2246 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002247}
2248
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002249/*[clinic input]
2250_sre.SRE_Match.__deepcopy__
2251
2252 memo: object
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002253 /
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002254
2255[clinic start generated code]*/
2256
2257static PyObject *
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002258_sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
2259/*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002260{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002261 Py_INCREF(self);
2262 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002263}
2264
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002265PyDoc_STRVAR(match_doc,
2266"The result of re.match() and re.search().\n\
2267Match objects always have a boolean value of True.");
2268
2269PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002270"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002271 Return subgroup(s) of the match by indices or names.\n\
2272 For 0 returns the entire match.");
2273
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002274static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02002275match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored))
Guido van Rossumb700df92000-03-31 14:59:30 +00002276{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002277 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002278 return PyLong_FromSsize_t(self->lastindex);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002279 Py_RETURN_NONE;
Guido van Rossumb700df92000-03-31 14:59:30 +00002280}
2281
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002282static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02002283match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored))
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002284{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002285 if (self->pattern->indexgroup &&
2286 self->lastindex >= 0 &&
2287 self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2288 {
2289 PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2290 self->lastindex);
2291 Py_INCREF(result);
2292 return result;
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002293 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002294 Py_RETURN_NONE;
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002295}
2296
2297static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02002298match_regs_get(MatchObject *self, void *Py_UNUSED(ignored))
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002299{
2300 if (self->regs) {
2301 Py_INCREF(self->regs);
2302 return self->regs;
2303 } else
2304 return match_regs(self);
2305}
2306
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002307static PyObject *
2308match_repr(MatchObject *self)
2309{
2310 PyObject *result;
2311 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2312 if (group0 == NULL)
2313 return NULL;
2314 result = PyUnicode_FromFormat(
2315 "<%s object; span=(%d, %d), match=%.50R>",
2316 Py_TYPE(self)->tp_name,
2317 self->mark[0], self->mark[1], group0);
2318 Py_DECREF(group0);
2319 return result;
2320}
2321
2322
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002323static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002324pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002325{
2326 /* create match object (from state object) */
2327
2328 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002329 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002330 char* base;
2331 int n;
2332
2333 if (status > 0) {
2334
2335 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002336 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002337 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2338 2*(pattern->groups+1));
2339 if (!match)
2340 return NULL;
2341
2342 Py_INCREF(pattern);
2343 match->pattern = pattern;
2344
2345 Py_INCREF(state->string);
2346 match->string = state->string;
2347
2348 match->regs = NULL;
2349 match->groups = pattern->groups+1;
2350
2351 /* fill in group slices */
2352
2353 base = (char*) state->beginning;
2354 n = state->charsize;
2355
2356 match->mark[0] = ((char*) state->start - base) / n;
2357 match->mark[1] = ((char*) state->ptr - base) / n;
2358
2359 for (i = j = 0; i < pattern->groups; i++, j+=2)
2360 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2361 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2362 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2363 } else
2364 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2365
2366 match->pos = state->pos;
2367 match->endpos = state->endpos;
2368
2369 match->lastindex = state->lastindex;
2370
2371 return (PyObject*) match;
2372
2373 } else if (status == 0) {
2374
2375 /* no match */
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002376 Py_RETURN_NONE;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002377
2378 }
2379
2380 /* internal error */
2381 pattern_error(status);
2382 return NULL;
2383}
2384
2385
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002386/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002387/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002388
2389static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002390scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002391{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002392 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002393 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002394 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002395}
2396
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002397/*[clinic input]
2398_sre.SRE_Scanner.match
2399
2400[clinic start generated code]*/
2401
2402static PyObject *
2403_sre_SRE_Scanner_match_impl(ScannerObject *self)
2404/*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002405{
2406 SRE_STATE* state = &self->state;
2407 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002408 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002409
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002410 if (state->start == NULL)
2411 Py_RETURN_NONE;
2412
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002413 state_reset(state);
2414
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002415 state->ptr = state->start;
2416
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002417 status = sre_match(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002418 if (PyErr_Occurred())
2419 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002420
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002421 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002422 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002423
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002424 if (status == 0)
2425 state->start = NULL;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002426 else {
2427 state->must_advance = (state->ptr == state->start);
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002428 state->start = state->ptr;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002429 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002430
2431 return match;
2432}
2433
2434
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002435/*[clinic input]
2436_sre.SRE_Scanner.search
2437
2438[clinic start generated code]*/
2439
2440static PyObject *
2441_sre_SRE_Scanner_search_impl(ScannerObject *self)
2442/*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002443{
2444 SRE_STATE* state = &self->state;
2445 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002446 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002447
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002448 if (state->start == NULL)
2449 Py_RETURN_NONE;
2450
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002451 state_reset(state);
2452
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002453 state->ptr = state->start;
2454
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002455 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002456 if (PyErr_Occurred())
2457 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002458
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002459 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002460 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002461
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002462 if (status == 0)
2463 state->start = NULL;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002464 else {
2465 state->must_advance = (state->ptr == state->start);
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002466 state->start = state->ptr;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002467 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002468
2469 return match;
2470}
2471
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002472static PyObject *
2473pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002474{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002475 ScannerObject* scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002476
2477 /* create scanner object */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002478 scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2479 if (!scanner)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002480 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002481 scanner->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002482
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002483 /* create search state object */
2484 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2485 Py_DECREF(scanner);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002486 return NULL;
2487 }
2488
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002489 Py_INCREF(self);
2490 scanner->pattern = (PyObject*) self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002491
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002492 return (PyObject*) scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002493}
2494
Victor Stinnerb44fb122016-11-21 16:35:08 +01002495static Py_hash_t
2496pattern_hash(PatternObject *self)
2497{
2498 Py_hash_t hash, hash2;
2499
2500 hash = PyObject_Hash(self->pattern);
2501 if (hash == -1) {
2502 return -1;
2503 }
2504
2505 hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2506 hash ^= hash2;
2507
2508 hash ^= self->flags;
2509 hash ^= self->isbytes;
2510 hash ^= self->codesize;
2511
2512 if (hash == -1) {
2513 hash = -2;
2514 }
2515 return hash;
2516}
2517
2518static PyObject*
2519pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2520{
2521 PatternObject *left, *right;
2522 int cmp;
2523
2524 if (op != Py_EQ && op != Py_NE) {
2525 Py_RETURN_NOTIMPLEMENTED;
2526 }
2527
2528 if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
2529 Py_RETURN_NOTIMPLEMENTED;
2530 }
Victor Stinnerbcf4dcc2016-11-22 15:30:38 +01002531
2532 if (lefto == righto) {
2533 /* a pattern is equal to itself */
2534 return PyBool_FromLong(op == Py_EQ);
2535 }
2536
Victor Stinnerb44fb122016-11-21 16:35:08 +01002537 left = (PatternObject *)lefto;
2538 right = (PatternObject *)righto;
2539
2540 cmp = (left->flags == right->flags
2541 && left->isbytes == right->isbytes
Victor Stinnere670b2d2016-11-22 15:23:00 +01002542 && left->codesize == right->codesize);
Victor Stinnerb44fb122016-11-21 16:35:08 +01002543 if (cmp) {
2544 /* Compare the code and the pattern because the same pattern can
2545 produce different codes depending on the locale used to compile the
2546 pattern when the re.LOCALE flag is used. Don't compare groups,
2547 indexgroup nor groupindex: they are derivated from the pattern. */
2548 cmp = (memcmp(left->code, right->code,
2549 sizeof(left->code[0]) * left->codesize) == 0);
2550 }
2551 if (cmp) {
2552 cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2553 Py_EQ);
2554 if (cmp < 0) {
2555 return NULL;
2556 }
2557 }
2558 if (op == Py_NE) {
2559 cmp = !cmp;
2560 }
2561 return PyBool_FromLong(cmp);
2562}
2563
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002564#include "clinic/_sre.c.h"
2565
2566static PyMethodDef pattern_methods[] = {
2567 _SRE_SRE_PATTERN_MATCH_METHODDEF
2568 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2569 _SRE_SRE_PATTERN_SEARCH_METHODDEF
2570 _SRE_SRE_PATTERN_SUB_METHODDEF
2571 _SRE_SRE_PATTERN_SUBN_METHODDEF
2572 _SRE_SRE_PATTERN_FINDALL_METHODDEF
2573 _SRE_SRE_PATTERN_SPLIT_METHODDEF
2574 _SRE_SRE_PATTERN_FINDITER_METHODDEF
2575 _SRE_SRE_PATTERN_SCANNER_METHODDEF
2576 _SRE_SRE_PATTERN___COPY___METHODDEF
2577 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2578 {NULL, NULL}
2579};
2580
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002581static PyGetSetDef pattern_getset[] = {
2582 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2583 "A dictionary mapping group names to group numbers."},
2584 {NULL} /* Sentinel */
2585};
2586
2587#define PAT_OFF(x) offsetof(PatternObject, x)
2588static PyMemberDef pattern_members[] = {
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002589 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY,
2590 "The pattern string from which the RE object was compiled."},
2591 {"flags", T_INT, PAT_OFF(flags), READONLY,
2592 "The regex matching flags."},
2593 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY,
2594 "The number of capturing groups in the pattern."},
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002595 {NULL} /* Sentinel */
2596};
2597
2598static PyTypeObject Pattern_Type = {
2599 PyVarObject_HEAD_INIT(NULL, 0)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002600 "re.Pattern",
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002601 sizeof(PatternObject), sizeof(SRE_CODE),
2602 (destructor)pattern_dealloc, /* tp_dealloc */
2603 0, /* tp_print */
2604 0, /* tp_getattr */
2605 0, /* tp_setattr */
2606 0, /* tp_reserved */
2607 (reprfunc)pattern_repr, /* tp_repr */
2608 0, /* tp_as_number */
2609 0, /* tp_as_sequence */
2610 0, /* tp_as_mapping */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002611 (hashfunc)pattern_hash, /* tp_hash */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002612 0, /* tp_call */
2613 0, /* tp_str */
2614 0, /* tp_getattro */
2615 0, /* tp_setattro */
2616 0, /* tp_as_buffer */
2617 Py_TPFLAGS_DEFAULT, /* tp_flags */
2618 pattern_doc, /* tp_doc */
2619 0, /* tp_traverse */
2620 0, /* tp_clear */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002621 pattern_richcompare, /* tp_richcompare */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002622 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2623 0, /* tp_iter */
2624 0, /* tp_iternext */
2625 pattern_methods, /* tp_methods */
2626 pattern_members, /* tp_members */
2627 pattern_getset, /* tp_getset */
2628};
2629
Eric V. Smith605bdae2016-09-11 08:55:43 -04002630/* Match objects do not support length or assignment, but do support
2631 __getitem__. */
2632static PyMappingMethods match_as_mapping = {
2633 NULL,
2634 (binaryfunc)match_getitem,
2635 NULL
2636};
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002637
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002638static PyMethodDef match_methods[] = {
2639 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2640 _SRE_SRE_MATCH_START_METHODDEF
2641 _SRE_SRE_MATCH_END_METHODDEF
2642 _SRE_SRE_MATCH_SPAN_METHODDEF
2643 _SRE_SRE_MATCH_GROUPS_METHODDEF
2644 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2645 _SRE_SRE_MATCH_EXPAND_METHODDEF
2646 _SRE_SRE_MATCH___COPY___METHODDEF
2647 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2648 {NULL, NULL}
2649};
2650
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002651static PyGetSetDef match_getset[] = {
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002652 {"lastindex", (getter)match_lastindex_get, (setter)NULL,
2653 "The integer index of the last matched capturing group."},
2654 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
2655 "The name of the last matched capturing group."},
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002656 {"regs", (getter)match_regs_get, (setter)NULL},
2657 {NULL}
2658};
2659
2660#define MATCH_OFF(x) offsetof(MatchObject, x)
2661static PyMemberDef match_members[] = {
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002662 {"string", T_OBJECT, MATCH_OFF(string), READONLY,
2663 "The string passed to match() or search()."},
2664 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY,
2665 "The regular expression object."},
2666 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY,
2667 "The index into the string at which the RE engine started looking for a match."},
2668 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY,
2669 "The index into the string beyond which the RE engine will not go."},
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002670 {NULL}
2671};
2672
2673/* FIXME: implement setattr("string", None) as a special case (to
2674 detach the associated string, if any */
2675
2676static PyTypeObject Match_Type = {
2677 PyVarObject_HEAD_INIT(NULL,0)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002678 "re.Match",
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002679 sizeof(MatchObject), sizeof(Py_ssize_t),
2680 (destructor)match_dealloc, /* tp_dealloc */
2681 0, /* tp_print */
2682 0, /* tp_getattr */
2683 0, /* tp_setattr */
2684 0, /* tp_reserved */
2685 (reprfunc)match_repr, /* tp_repr */
2686 0, /* tp_as_number */
2687 0, /* tp_as_sequence */
Eric V. Smith605bdae2016-09-11 08:55:43 -04002688 &match_as_mapping, /* tp_as_mapping */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002689 0, /* tp_hash */
2690 0, /* tp_call */
2691 0, /* tp_str */
2692 0, /* tp_getattro */
2693 0, /* tp_setattro */
2694 0, /* tp_as_buffer */
2695 Py_TPFLAGS_DEFAULT, /* tp_flags */
2696 match_doc, /* tp_doc */
2697 0, /* tp_traverse */
2698 0, /* tp_clear */
2699 0, /* tp_richcompare */
2700 0, /* tp_weaklistoffset */
2701 0, /* tp_iter */
2702 0, /* tp_iternext */
2703 match_methods, /* tp_methods */
2704 match_members, /* tp_members */
2705 match_getset, /* tp_getset */
2706};
2707
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002708static PyMethodDef scanner_methods[] = {
2709 _SRE_SRE_SCANNER_MATCH_METHODDEF
2710 _SRE_SRE_SCANNER_SEARCH_METHODDEF
2711 {NULL, NULL}
2712};
2713
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002714#define SCAN_OFF(x) offsetof(ScannerObject, x)
2715static PyMemberDef scanner_members[] = {
2716 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2717 {NULL} /* Sentinel */
2718};
2719
2720static PyTypeObject Scanner_Type = {
2721 PyVarObject_HEAD_INIT(NULL, 0)
2722 "_" SRE_MODULE ".SRE_Scanner",
2723 sizeof(ScannerObject), 0,
2724 (destructor)scanner_dealloc,/* tp_dealloc */
2725 0, /* tp_print */
2726 0, /* tp_getattr */
2727 0, /* tp_setattr */
2728 0, /* tp_reserved */
2729 0, /* tp_repr */
2730 0, /* tp_as_number */
2731 0, /* tp_as_sequence */
2732 0, /* tp_as_mapping */
2733 0, /* tp_hash */
2734 0, /* tp_call */
2735 0, /* tp_str */
2736 0, /* tp_getattro */
2737 0, /* tp_setattro */
2738 0, /* tp_as_buffer */
2739 Py_TPFLAGS_DEFAULT, /* tp_flags */
2740 0, /* tp_doc */
2741 0, /* tp_traverse */
2742 0, /* tp_clear */
2743 0, /* tp_richcompare */
2744 0, /* tp_weaklistoffset */
2745 0, /* tp_iter */
2746 0, /* tp_iternext */
2747 scanner_methods, /* tp_methods */
2748 scanner_members, /* tp_members */
2749 0, /* tp_getset */
2750};
2751
Guido van Rossumb700df92000-03-31 14:59:30 +00002752static PyMethodDef _functions[] = {
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002753 _SRE_COMPILE_METHODDEF
2754 _SRE_GETCODESIZE_METHODDEF
Serhiy Storchaka6d336a02017-05-09 23:37:14 +03002755 _SRE_ASCII_ISCASED_METHODDEF
2756 _SRE_UNICODE_ISCASED_METHODDEF
Serhiy Storchaka7186cc22017-05-05 10:42:46 +03002757 _SRE_ASCII_TOLOWER_METHODDEF
2758 _SRE_UNICODE_TOLOWER_METHODDEF
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002759 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002760};
2761
Martin v. Löwis1a214512008-06-11 05:26:20 +00002762static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002763 PyModuleDef_HEAD_INIT,
2764 "_" SRE_MODULE,
2765 NULL,
2766 -1,
2767 _functions,
2768 NULL,
2769 NULL,
2770 NULL,
2771 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002772};
2773
2774PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002775{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002776 PyObject* m;
2777 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002778 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002779
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002780 /* Patch object types */
2781 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2782 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002783 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002784
Martin v. Löwis1a214512008-06-11 05:26:20 +00002785 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002786 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002787 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002788 d = PyModule_GetDict(m);
2789
Christian Heimes217cfd12007-12-02 14:31:20 +00002790 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002791 if (x) {
2792 PyDict_SetItemString(d, "MAGIC", x);
2793 Py_DECREF(x);
2794 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002795
Christian Heimes217cfd12007-12-02 14:31:20 +00002796 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002797 if (x) {
2798 PyDict_SetItemString(d, "CODESIZE", x);
2799 Py_DECREF(x);
2800 }
2801
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002802 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2803 if (x) {
2804 PyDict_SetItemString(d, "MAXREPEAT", x);
2805 Py_DECREF(x);
2806 }
2807
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03002808 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2809 if (x) {
2810 PyDict_SetItemString(d, "MAXGROUPS", x);
2811 Py_DECREF(x);
2812 }
2813
Neal Norwitzfe537132007-08-26 03:55:15 +00002814 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002815 if (x) {
2816 PyDict_SetItemString(d, "copyright", x);
2817 Py_DECREF(x);
2818 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002819 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002820}
2821
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002822/* vim:ts=4:sw=4:et
2823*/