blob: d67083037e51b85ff5f6cae71b4538449af996ee [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020038static const char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000063
Fredrik Lundh80946112000-06-29 18:03:25 +000064#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000065#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000066#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000067/* fastest possible local call under MSVC */
68#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000069#else
Benjamin Peterson791dc832017-04-20 23:52:19 -070070#define LOCAL(type) static inline type
Guido van Rossumb700df92000-03-31 14:59:30 +000071#endif
72
73/* error codes */
74#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000075#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000076#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000077#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000078#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000079
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000081#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000082#else
83#define TRACE(v)
84#endif
85
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000086/* -------------------------------------------------------------------- */
87/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000088
Fredrik Lundh436c3d582000-06-29 08:58:44 +000089#define SRE_IS_DIGIT(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030090 ((ch) < 128 && Py_ISDIGIT(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000091#define SRE_IS_SPACE(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030092 ((ch) < 128 && Py_ISSPACE(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000093#define SRE_IS_LINEBREAK(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030094 ((ch) == '\n')
Fredrik Lundh436c3d582000-06-29 08:58:44 +000095#define SRE_IS_WORD(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030096 ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
Guido van Rossumb700df92000-03-31 14:59:30 +000097
Serhiy Storchaka3557b052017-10-24 23:31:42 +030098static unsigned int sre_lower_ascii(unsigned int ch)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000099{
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300100 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000101}
102
Serhiy Storchaka3557b052017-10-24 23:31:42 +0300103static unsigned int sre_upper_ascii(unsigned int ch)
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200104{
105 return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
106}
107
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000108/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000109/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
110 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000111#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000112#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
113
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000114static unsigned int sre_lower_locale(unsigned int ch)
115{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000116 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000117}
118
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200119static unsigned int sre_upper_locale(unsigned int ch)
120{
121 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
122}
123
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000124/* unicode-specific character predicates */
125
Victor Stinner0058b862011-09-29 03:27:47 +0200126#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
127#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
128#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
129#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
130#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000131
132static unsigned int sre_lower_unicode(unsigned int ch)
133{
Victor Stinner0058b862011-09-29 03:27:47 +0200134 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000135}
136
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200137static unsigned int sre_upper_unicode(unsigned int ch)
138{
139 return (unsigned int) Py_UNICODE_TOUPPER(ch);
140}
141
Guido van Rossumb700df92000-03-31 14:59:30 +0000142LOCAL(int)
143sre_category(SRE_CODE category, unsigned int ch)
144{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000145 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000146
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000147 case SRE_CATEGORY_DIGIT:
148 return SRE_IS_DIGIT(ch);
149 case SRE_CATEGORY_NOT_DIGIT:
150 return !SRE_IS_DIGIT(ch);
151 case SRE_CATEGORY_SPACE:
152 return SRE_IS_SPACE(ch);
153 case SRE_CATEGORY_NOT_SPACE:
154 return !SRE_IS_SPACE(ch);
155 case SRE_CATEGORY_WORD:
156 return SRE_IS_WORD(ch);
157 case SRE_CATEGORY_NOT_WORD:
158 return !SRE_IS_WORD(ch);
159 case SRE_CATEGORY_LINEBREAK:
160 return SRE_IS_LINEBREAK(ch);
161 case SRE_CATEGORY_NOT_LINEBREAK:
162 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000163
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000164 case SRE_CATEGORY_LOC_WORD:
165 return SRE_LOC_IS_WORD(ch);
166 case SRE_CATEGORY_LOC_NOT_WORD:
167 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000168
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000169 case SRE_CATEGORY_UNI_DIGIT:
170 return SRE_UNI_IS_DIGIT(ch);
171 case SRE_CATEGORY_UNI_NOT_DIGIT:
172 return !SRE_UNI_IS_DIGIT(ch);
173 case SRE_CATEGORY_UNI_SPACE:
174 return SRE_UNI_IS_SPACE(ch);
175 case SRE_CATEGORY_UNI_NOT_SPACE:
176 return !SRE_UNI_IS_SPACE(ch);
177 case SRE_CATEGORY_UNI_WORD:
178 return SRE_UNI_IS_WORD(ch);
179 case SRE_CATEGORY_UNI_NOT_WORD:
180 return !SRE_UNI_IS_WORD(ch);
181 case SRE_CATEGORY_UNI_LINEBREAK:
182 return SRE_UNI_IS_LINEBREAK(ch);
183 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
184 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000185 }
186 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000187}
188
Serhiy Storchaka3557b052017-10-24 23:31:42 +0300189LOCAL(int)
190char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
191{
192 return ch == pattern
193 || (SRE_CODE) sre_lower_locale(ch) == pattern
194 || (SRE_CODE) sre_upper_locale(ch) == pattern;
195}
196
197
Guido van Rossumb700df92000-03-31 14:59:30 +0000198/* helpers */
199
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000200static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000201data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000202{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000203 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000204 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000205 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000206 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000207 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000208}
209
210static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000211data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000212{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000213 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000214 minsize = state->data_stack_base+size;
215 cursize = state->data_stack_size;
216 if (cursize < minsize) {
217 void* stack;
218 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300219 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000220 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000221 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000222 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000223 return SRE_ERROR_MEMORY;
224 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000225 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000226 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000227 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000228 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000229}
230
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000231/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000232
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300233#define SRE_CHAR Py_UCS1
234#define SIZEOF_SRE_CHAR 1
235#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300236#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000237
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300238/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000239
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300240#define SRE_CHAR Py_UCS2
241#define SIZEOF_SRE_CHAR 2
242#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300243#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000244
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300245/* generate 32-bit unicode version */
246
247#define SRE_CHAR Py_UCS4
248#define SIZEOF_SRE_CHAR 4
249#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300250#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000251
252/* -------------------------------------------------------------------- */
253/* factories and destructors */
254
255/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100256static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300257static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
Guido van Rossumb700df92000-03-31 14:59:30 +0000258
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300259
260/*[clinic input]
261module _sre
262class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
263class _sre.SRE_Match "MatchObject *" "&Match_Type"
264class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
265[clinic start generated code]*/
266/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
267
Larry Hastings2d0a69a2015-05-03 14:49:19 -0700268static PyTypeObject Pattern_Type;
269static PyTypeObject Match_Type;
270static PyTypeObject Scanner_Type;
271
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300272/*[clinic input]
273_sre.getcodesize -> int
274[clinic start generated code]*/
275
276static int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300277_sre_getcodesize_impl(PyObject *module)
278/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000279{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300280 return sizeof(SRE_CODE);
Guido van Rossumb700df92000-03-31 14:59:30 +0000281}
282
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300283/*[clinic input]
Serhiy Storchaka6d336a02017-05-09 23:37:14 +0300284_sre.ascii_iscased -> bool
285
286 character: int
287 /
288
289[clinic start generated code]*/
290
291static int
292_sre_ascii_iscased_impl(PyObject *module, int character)
293/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
294{
295 unsigned int ch = (unsigned int)character;
Serhiy Storchaka3557b052017-10-24 23:31:42 +0300296 return ch != sre_lower_ascii(ch) || ch != sre_upper_ascii(ch);
Serhiy Storchaka6d336a02017-05-09 23:37:14 +0300297}
298
299/*[clinic input]
300_sre.unicode_iscased -> bool
301
302 character: int
303 /
304
305[clinic start generated code]*/
306
307static int
308_sre_unicode_iscased_impl(PyObject *module, int character)
309/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
310{
311 unsigned int ch = (unsigned int)character;
312 return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
313}
314
315/*[clinic input]
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300316_sre.ascii_tolower -> int
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300317
318 character: int
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300319 /
320
321[clinic start generated code]*/
322
323static int
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300324_sre_ascii_tolower_impl(PyObject *module, int character)
325/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000326{
Serhiy Storchaka3557b052017-10-24 23:31:42 +0300327 return sre_lower_ascii(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000328}
329
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300330/*[clinic input]
331_sre.unicode_tolower -> int
332
333 character: int
334 /
335
336[clinic start generated code]*/
337
338static int
339_sre_unicode_tolower_impl(PyObject *module, int character)
340/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
341{
342 return sre_lower_unicode(character);
343}
344
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000345LOCAL(void)
346state_reset(SRE_STATE* state)
347{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000348 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000349 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000350
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000351 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000352 state->lastindex = -1;
353
354 state->repeat = NULL;
355
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000356 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000357}
358
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000359static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200360getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300361 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600362 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000363{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000364 /* given a python object, return a data pointer, a length (in
365 characters), and a character size. return NULL if the object
366 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000367
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000368 /* Unicode objects do not support the buffer API. So, get the data
369 directly instead. */
370 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200371 if (PyUnicode_READY(string) == -1)
372 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200373 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200374 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300375 *p_isbytes = 0;
376 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000377 }
378
Victor Stinner0058b862011-09-29 03:27:47 +0200379 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300380 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200381 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300382 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000383 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000384
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300385 *p_length = view->len;
386 *p_charsize = 1;
387 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000388
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300389 if (view->buf == NULL) {
390 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
391 PyBuffer_Release(view);
392 view->buf = NULL;
393 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000394 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300395 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000396}
397
398LOCAL(PyObject*)
399state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000400 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000401{
402 /* prepare state object */
403
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000404 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300405 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000406 void* ptr;
407
408 memset(state, 0, sizeof(SRE_STATE));
409
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300410 state->mark = PyMem_New(void *, pattern->groups * 2);
411 if (!state->mark) {
412 PyErr_NoMemory();
413 goto err;
414 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000415 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000416 state->lastindex = -1;
417
Benjamin Petersone48944b2012-03-07 14:50:25 -0600418 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300419 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000420 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600421 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000422
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300423 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600424 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200425 "cannot use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600426 goto err;
427 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300428 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600429 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200430 "cannot use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600431 goto err;
432 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000433
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000434 /* adjust boundaries */
435 if (start < 0)
436 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000437 else if (start > length)
438 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000439
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000440 if (end < 0)
441 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000442 else if (end > length)
443 end = length;
444
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300445 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000446 state->charsize = charsize;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200447 state->match_all = 0;
448 state->must_advance = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000449
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000450 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000451
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000452 state->start = (void*) ((char*) ptr + start * state->charsize);
453 state->end = (void*) ((char*) ptr + end * state->charsize);
454
455 Py_INCREF(string);
456 state->string = string;
457 state->pos = start;
458 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000459
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000460 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600461 err:
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300462 PyMem_Del(state->mark);
463 state->mark = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600464 if (state->buffer.buf)
465 PyBuffer_Release(&state->buffer);
466 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000467}
468
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000469LOCAL(void)
470state_fini(SRE_STATE* state)
471{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600472 if (state->buffer.buf)
473 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000474 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000475 data_stack_dealloc(state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300476 PyMem_Del(state->mark);
477 state->mark = NULL;
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000478}
479
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000480/* calculate offset from start of string */
481#define STATE_OFFSET(state, member)\
482 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
483
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000484LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300485getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300486 PyObject* string, Py_ssize_t start, Py_ssize_t end)
487{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300488 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300489 if (PyBytes_CheckExact(string) &&
490 start == 0 && end == PyBytes_GET_SIZE(string)) {
491 Py_INCREF(string);
492 return string;
493 }
494 return PyBytes_FromStringAndSize(
495 (const char *)ptr + start, end - start);
496 }
497 else {
498 return PyUnicode_Substring(string, start, end);
499 }
500}
501
502LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000503state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000504{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000505 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000506
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000507 index = (index - 1) * 2;
508
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000509 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000510 if (empty)
511 /* want empty string */
512 i = j = 0;
513 else {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200514 Py_RETURN_NONE;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000515 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000516 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000517 i = STATE_OFFSET(state, state->mark[index]);
518 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000519 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000520
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300521 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000522}
523
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000524static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100525pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000526{
527 switch (status) {
528 case SRE_ERROR_RECURSION_LIMIT:
Yury Selivanovf488fb42015-07-03 01:04:23 -0400529 /* This error code seems to be unused. */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000530 PyErr_SetString(
Yury Selivanovf488fb42015-07-03 01:04:23 -0400531 PyExc_RecursionError,
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000532 "maximum recursion limit exceeded"
533 );
534 break;
535 case SRE_ERROR_MEMORY:
536 PyErr_NoMemory();
537 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000538 case SRE_ERROR_INTERRUPTED:
539 /* An exception has already been raised, so let it fly */
540 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000541 default:
542 /* other error codes indicate compiler/engine bugs */
543 PyErr_SetString(
544 PyExc_RuntimeError,
545 "internal error in regular expression engine"
546 );
547 }
548}
549
Guido van Rossumb700df92000-03-31 14:59:30 +0000550static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000551pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000552{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000553 if (self->weakreflist != NULL)
554 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000555 Py_XDECREF(self->pattern);
556 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000557 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000558 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000559}
560
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300561LOCAL(Py_ssize_t)
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200562sre_match(SRE_STATE* state, SRE_CODE* pattern)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300563{
564 if (state->charsize == 1)
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200565 return sre_ucs1_match(state, pattern, 1);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300566 if (state->charsize == 2)
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200567 return sre_ucs2_match(state, pattern, 1);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300568 assert(state->charsize == 4);
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200569 return sre_ucs4_match(state, pattern, 1);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300570}
571
572LOCAL(Py_ssize_t)
573sre_search(SRE_STATE* state, SRE_CODE* pattern)
574{
575 if (state->charsize == 1)
576 return sre_ucs1_search(state, pattern);
577 if (state->charsize == 2)
578 return sre_ucs2_search(state, pattern);
579 assert(state->charsize == 4);
580 return sre_ucs4_search(state, pattern);
581}
582
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300583/*[clinic input]
584_sre.SRE_Pattern.match
585
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200586 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300587 pos: Py_ssize_t = 0
588 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300589
590Matches zero or more characters at the beginning of the string.
591[clinic start generated code]*/
592
Larry Hastings16c51912014-01-07 11:53:01 -0800593static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300594_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200595 Py_ssize_t pos, Py_ssize_t endpos)
596/*[clinic end generated code: output=ea2d838888510661 input=a2ba191647abebe5]*/
Larry Hastings16c51912014-01-07 11:53:01 -0800597{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000598 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100599 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300600 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000601
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300602 if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000603 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000604
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000605 state.ptr = state.start;
606
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000607 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
608
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200609 status = sre_match(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000610
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000611 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300612 if (PyErr_Occurred()) {
613 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000614 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300615 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000616
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300617 match = pattern_new_match(self, &state, status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000618 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300619 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000620}
621
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300622/*[clinic input]
623_sre.SRE_Pattern.fullmatch
624
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200625 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300626 pos: Py_ssize_t = 0
627 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300628
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +0300629Matches against all of the string.
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300630[clinic start generated code]*/
631
632static PyObject *
633_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200634 Py_ssize_t pos, Py_ssize_t endpos)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +0300635/*[clinic end generated code: output=5833c47782a35f4a input=d9fb03a7625b5828]*/
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200636{
637 SRE_STATE state;
638 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300639 PyObject *match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200640
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300641 if (!state_init(&state, self, string, pos, endpos))
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200642 return NULL;
643
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200644 state.ptr = state.start;
645
646 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
647
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200648 state.match_all = 1;
649 status = sre_match(&state, PatternObject_GetCode(self));
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200650
651 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300652 if (PyErr_Occurred()) {
653 state_fini(&state);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200654 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300655 }
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200656
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300657 match = pattern_new_match(self, &state, status);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200658 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300659 return match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200660}
661
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300662/*[clinic input]
663_sre.SRE_Pattern.search
664
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200665 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300666 pos: Py_ssize_t = 0
667 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300668
669Scan through string looking for a match, and return a corresponding match object instance.
670
671Return None if no position in the string matches.
672[clinic start generated code]*/
673
674static PyObject *
675_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200676 Py_ssize_t pos, Py_ssize_t endpos)
677/*[clinic end generated code: output=25f302a644e951e8 input=4ae5cb7dc38fed1b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000678{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000679 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100680 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300681 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000682
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300683 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000684 return NULL;
685
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000686 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
687
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300688 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000689
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000690 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
691
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300692 if (PyErr_Occurred()) {
693 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000694 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300695 }
Thomas Wouters89f507f2006-12-13 04:49:30 +0000696
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300697 match = pattern_new_match(self, &state, status);
698 state_fini(&state);
699 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000700}
701
702static PyObject*
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200703call(const char* module, const char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000704{
705 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000706 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000707 PyObject* func;
708 PyObject* result;
709
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000710 if (!args)
711 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000712 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000713 if (!name)
714 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000715 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000716 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000717 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000718 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000719 func = PyObject_GetAttrString(mod, function);
720 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000721 if (!func)
722 return NULL;
723 result = PyObject_CallObject(func, args);
724 Py_DECREF(func);
725 Py_DECREF(args);
726 return result;
727}
728
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300729/*[clinic input]
730_sre.SRE_Pattern.findall
731
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200732 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300733 pos: Py_ssize_t = 0
734 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300735
736Return a list of all non-overlapping matches of pattern in string.
737[clinic start generated code]*/
738
739static PyObject *
740_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200741 Py_ssize_t pos, Py_ssize_t endpos)
742/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000743{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000744 SRE_STATE state;
745 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100746 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000747 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000748
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300749 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000750 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000751
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000752 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000753 if (!list) {
754 state_fini(&state);
755 return NULL;
756 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000757
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000758 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000759
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000760 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000761
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000762 state_reset(&state);
763
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000764 state.ptr = state.start;
765
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300766 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300767 if (PyErr_Occurred())
768 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000769
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000770 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000771 if (status == 0)
772 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000773 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000774 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000775 }
Tim Peters3d563502006-01-21 02:47:53 +0000776
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000777 /* don't bother to build a match object */
778 switch (self->groups) {
779 case 0:
780 b = STATE_OFFSET(&state, state.start);
781 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300782 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300783 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000784 if (!item)
785 goto error;
786 break;
787 case 1:
788 item = state_getslice(&state, 1, string, 1);
789 if (!item)
790 goto error;
791 break;
792 default:
793 item = PyTuple_New(self->groups);
794 if (!item)
795 goto error;
796 for (i = 0; i < self->groups; i++) {
797 PyObject* o = state_getslice(&state, i+1, string, 1);
798 if (!o) {
799 Py_DECREF(item);
800 goto error;
801 }
802 PyTuple_SET_ITEM(item, i, o);
803 }
804 break;
805 }
806
807 status = PyList_Append(list, item);
808 Py_DECREF(item);
809 if (status < 0)
810 goto error;
811
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200812 state.must_advance = (state.ptr == state.start);
813 state.start = state.ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000814 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000815
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000816 state_fini(&state);
817 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000818
819error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000820 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000821 state_fini(&state);
822 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000823
Guido van Rossumb700df92000-03-31 14:59:30 +0000824}
825
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300826/*[clinic input]
827_sre.SRE_Pattern.finditer
828
829 string: object
830 pos: Py_ssize_t = 0
831 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
832
833Return an iterator over all non-overlapping matches for the RE pattern in string.
834
835For each match, the iterator returns a match object.
836[clinic start generated code]*/
837
838static PyObject *
839_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
840 Py_ssize_t pos, Py_ssize_t endpos)
841/*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
Fredrik Lundh703ce812001-10-24 22:16:30 +0000842{
843 PyObject* scanner;
844 PyObject* search;
845 PyObject* iterator;
846
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300847 scanner = pattern_scanner(self, string, pos, endpos);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000848 if (!scanner)
849 return NULL;
850
851 search = PyObject_GetAttrString(scanner, "search");
852 Py_DECREF(scanner);
853 if (!search)
854 return NULL;
855
856 iterator = PyCallIter_New(search, Py_None);
857 Py_DECREF(search);
858
859 return iterator;
860}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000861
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300862/*[clinic input]
863_sre.SRE_Pattern.scanner
864
865 string: object
866 pos: Py_ssize_t = 0
867 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
868
869[clinic start generated code]*/
870
871static PyObject *
872_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
873 Py_ssize_t pos, Py_ssize_t endpos)
874/*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
875{
876 return pattern_scanner(self, string, pos, endpos);
877}
878
879/*[clinic input]
880_sre.SRE_Pattern.split
881
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200882 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300883 maxsplit: Py_ssize_t = 0
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300884
885Split string by the occurrences of pattern.
886[clinic start generated code]*/
887
888static PyObject *
889_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200890 Py_ssize_t maxsplit)
891/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000892{
893 SRE_STATE state;
894 PyObject* list;
895 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100896 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000897 Py_ssize_t n;
898 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000899 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000900
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200901 assert(self->codesize != 0);
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200902
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300903 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000904 return NULL;
905
906 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000907 if (!list) {
908 state_fini(&state);
909 return NULL;
910 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000911
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000912 n = 0;
913 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000914
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000915 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000916
917 state_reset(&state);
918
919 state.ptr = state.start;
920
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300921 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300922 if (PyErr_Occurred())
923 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000924
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000925 if (status <= 0) {
926 if (status == 0)
927 break;
928 pattern_error(status);
929 goto error;
930 }
Tim Peters3d563502006-01-21 02:47:53 +0000931
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000932 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300933 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000934 string, STATE_OFFSET(&state, last),
935 STATE_OFFSET(&state, state.start)
936 );
937 if (!item)
938 goto error;
939 status = PyList_Append(list, item);
940 Py_DECREF(item);
941 if (status < 0)
942 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000943
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000944 /* add groups (if any) */
945 for (i = 0; i < self->groups; i++) {
946 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000947 if (!item)
948 goto error;
949 status = PyList_Append(list, item);
950 Py_DECREF(item);
951 if (status < 0)
952 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000953 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000954
955 n = n + 1;
Serhiy Storchakafbb490f2018-01-04 11:06:13 +0200956 state.must_advance = (state.ptr == state.start);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000957 last = state.start = state.ptr;
958
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000959 }
960
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000961 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300962 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000963 string, STATE_OFFSET(&state, last), state.endpos
964 );
965 if (!item)
966 goto error;
967 status = PyList_Append(list, item);
968 Py_DECREF(item);
969 if (status < 0)
970 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000971
972 state_fini(&state);
973 return list;
974
975error:
976 Py_DECREF(list);
977 state_fini(&state);
978 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000979
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000980}
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000981
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000982static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000983pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000984 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000985{
986 SRE_STATE state;
987 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +0300988 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000989 PyObject* item;
990 PyObject* filter;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000991 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000992 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100993 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000994 Py_ssize_t n;
995 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300996 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000997 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600998 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000999
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001000 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001001 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001002 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001003 Py_INCREF(filter);
1004 filter_is_callable = 1;
1005 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001006 /* if not callable, check if it's a literal string */
1007 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001008 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001009 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001010 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001011 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001012 if (charsize == 1)
1013 literal = memchr(ptr, '\\', n) == NULL;
1014 else
1015 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001016 } else {
1017 PyErr_Clear();
1018 literal = 0;
1019 }
Benjamin Petersone48944b2012-03-07 14:50:25 -06001020 if (view.buf)
1021 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001022 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001023 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001024 Py_INCREF(filter);
1025 filter_is_callable = 0;
1026 } else {
1027 /* not a literal; hand it over to the template compiler */
1028 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001029 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001030 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001031 );
1032 if (!filter)
1033 return NULL;
1034 filter_is_callable = PyCallable_Check(filter);
1035 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00001036 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001037
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001038 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001039 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001040 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001041 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001042
1043 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001044 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001045 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001046 state_fini(&state);
1047 return NULL;
1048 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001049
1050 n = i = 0;
1051
1052 while (!count || n < count) {
1053
1054 state_reset(&state);
1055
1056 state.ptr = state.start;
1057
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001058 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001059 if (PyErr_Occurred())
1060 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001061
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001062 if (status <= 0) {
1063 if (status == 0)
1064 break;
1065 pattern_error(status);
1066 goto error;
1067 }
Tim Peters3d563502006-01-21 02:47:53 +00001068
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001069 b = STATE_OFFSET(&state, state.start);
1070 e = STATE_OFFSET(&state, state.ptr);
1071
1072 if (i < b) {
1073 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001074 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001075 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001076 if (!item)
1077 goto error;
1078 status = PyList_Append(list, item);
1079 Py_DECREF(item);
1080 if (status < 0)
1081 goto error;
1082
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02001083 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001084
1085 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001086 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001087 match = pattern_new_match(self, &state, 1);
1088 if (!match)
1089 goto error;
Victor Stinner7bfb42d2016-12-05 17:04:32 +01001090 item = PyObject_CallFunctionObjArgs(filter, match, NULL);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001091 Py_DECREF(match);
1092 if (!item)
1093 goto error;
1094 } else {
1095 /* filter is literal string */
1096 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001097 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001098 }
1099
1100 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001101 if (item != Py_None) {
1102 status = PyList_Append(list, item);
1103 Py_DECREF(item);
1104 if (status < 0)
1105 goto error;
1106 }
Tim Peters3d563502006-01-21 02:47:53 +00001107
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001108 i = e;
1109 n = n + 1;
Serhiy Storchakafbb490f2018-01-04 11:06:13 +02001110 state.must_advance = (state.ptr == state.start);
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02001111 state.start = state.ptr;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001112 }
1113
1114 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001115 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001116 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001117 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001118 if (!item)
1119 goto error;
1120 status = PyList_Append(list, item);
1121 Py_DECREF(item);
1122 if (status < 0)
1123 goto error;
1124 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001125
1126 state_fini(&state);
1127
Guido van Rossum4e173842001-12-07 04:25:10 +00001128 Py_DECREF(filter);
1129
Fredrik Lundhdac58492001-10-21 21:48:30 +00001130 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001131 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001132 if (!joiner) {
1133 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001134 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001135 }
1136 if (PyList_GET_SIZE(list) == 0) {
1137 Py_DECREF(list);
1138 item = joiner;
1139 }
1140 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001141 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001142 item = _PyBytes_Join(joiner, list);
1143 else
1144 item = PyUnicode_Join(joiner, list);
1145 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001146 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001147 if (!item)
1148 return NULL;
1149 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001150
1151 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001152 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001153
1154 return item;
1155
1156error:
1157 Py_DECREF(list);
1158 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001159 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001160 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001161
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001162}
1163
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001164/*[clinic input]
1165_sre.SRE_Pattern.sub
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001166
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001167 repl: object
1168 string: object
1169 count: Py_ssize_t = 0
1170
1171Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1172[clinic start generated code]*/
1173
1174static PyObject *
1175_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1176 PyObject *string, Py_ssize_t count)
1177/*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1178{
1179 return pattern_subx(self, repl, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001180}
1181
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001182/*[clinic input]
1183_sre.SRE_Pattern.subn
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001184
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001185 repl: object
1186 string: object
1187 count: Py_ssize_t = 0
1188
1189Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1190[clinic start generated code]*/
1191
1192static PyObject *
1193_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1194 PyObject *string, Py_ssize_t count)
1195/*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1196{
1197 return pattern_subx(self, repl, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001198}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001199
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001200/*[clinic input]
1201_sre.SRE_Pattern.__copy__
1202
1203[clinic start generated code]*/
1204
1205static PyObject *
1206_sre_SRE_Pattern___copy___impl(PatternObject *self)
1207/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001208{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001209 Py_INCREF(self);
1210 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001211}
1212
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001213/*[clinic input]
1214_sre.SRE_Pattern.__deepcopy__
1215
1216 memo: object
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001217 /
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001218
1219[clinic start generated code]*/
1220
1221static PyObject *
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001222_sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
1223/*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001224{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001225 Py_INCREF(self);
1226 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001227}
1228
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001229static PyObject *
1230pattern_repr(PatternObject *obj)
1231{
1232 static const struct {
1233 const char *name;
1234 int value;
1235 } flag_names[] = {
1236 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1237 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1238 {"re.LOCALE", SRE_FLAG_LOCALE},
1239 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1240 {"re.DOTALL", SRE_FLAG_DOTALL},
1241 {"re.UNICODE", SRE_FLAG_UNICODE},
1242 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1243 {"re.DEBUG", SRE_FLAG_DEBUG},
1244 {"re.ASCII", SRE_FLAG_ASCII},
1245 };
1246 PyObject *result = NULL;
1247 PyObject *flag_items;
Victor Stinner706768c2014-08-16 01:03:39 +02001248 size_t i;
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001249 int flags = obj->flags;
1250
1251 /* Omit re.UNICODE for valid string patterns. */
1252 if (obj->isbytes == 0 &&
1253 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1254 SRE_FLAG_UNICODE)
1255 flags &= ~SRE_FLAG_UNICODE;
1256
1257 flag_items = PyList_New(0);
1258 if (!flag_items)
1259 return NULL;
1260
1261 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1262 if (flags & flag_names[i].value) {
1263 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1264 if (!item)
1265 goto done;
1266
1267 if (PyList_Append(flag_items, item) < 0) {
1268 Py_DECREF(item);
1269 goto done;
1270 }
1271 Py_DECREF(item);
1272 flags &= ~flag_names[i].value;
1273 }
1274 }
1275 if (flags) {
1276 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1277 if (!item)
1278 goto done;
1279
1280 if (PyList_Append(flag_items, item) < 0) {
1281 Py_DECREF(item);
1282 goto done;
1283 }
1284 Py_DECREF(item);
1285 }
1286
1287 if (PyList_Size(flag_items) > 0) {
1288 PyObject *flags_result;
1289 PyObject *sep = PyUnicode_FromString("|");
1290 if (!sep)
1291 goto done;
1292 flags_result = PyUnicode_Join(sep, flag_items);
1293 Py_DECREF(sep);
1294 if (!flags_result)
1295 goto done;
1296 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1297 obj->pattern, flags_result);
1298 Py_DECREF(flags_result);
1299 }
1300 else {
1301 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1302 }
1303
1304done:
1305 Py_DECREF(flag_items);
1306 return result;
1307}
1308
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03001309PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001310
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001311/* PatternObject's 'groupindex' method. */
1312static PyObject *
1313pattern_groupindex(PatternObject *self)
1314{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001315 if (self->groupindex == NULL)
1316 return PyDict_New();
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001317 return PyDictProxy_New(self->groupindex);
1318}
1319
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001320static int _validate(PatternObject *self); /* Forward */
1321
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001322/*[clinic input]
1323_sre.compile
1324
1325 pattern: object
1326 flags: int
1327 code: object(subclass_of='&PyList_Type')
1328 groups: Py_ssize_t
Victor Stinner726a57d2016-11-22 23:04:39 +01001329 groupindex: object(subclass_of='&PyDict_Type')
1330 indexgroup: object(subclass_of='&PyTuple_Type')
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001331
1332[clinic start generated code]*/
1333
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001334static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001335_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001336 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1337 PyObject *indexgroup)
Victor Stinner726a57d2016-11-22 23:04:39 +01001338/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001339{
1340 /* "compile" pattern descriptor to pattern object */
1341
1342 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001343 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001344
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001345 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001346 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001347 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1348 if (!self)
1349 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001350 self->weakreflist = NULL;
1351 self->pattern = NULL;
1352 self->groupindex = NULL;
1353 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001354
1355 self->codesize = n;
1356
1357 for (i = 0; i < n; i++) {
1358 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001359 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001360 self->code[i] = (SRE_CODE) value;
1361 if ((unsigned long) self->code[i] != value) {
1362 PyErr_SetString(PyExc_OverflowError,
1363 "regular expression code size limit exceeded");
1364 break;
1365 }
1366 }
1367
1368 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001369 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001370 return NULL;
1371 }
1372
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001373 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001374 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001375 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001376 else {
1377 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001378 int charsize;
1379 Py_buffer view;
1380 view.buf = NULL;
1381 if (!getstring(pattern, &p_length, &self->isbytes,
1382 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001383 Py_DECREF(self);
1384 return NULL;
1385 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001386 if (view.buf)
1387 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001388 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001389
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001390 Py_INCREF(pattern);
1391 self->pattern = pattern;
1392
1393 self->flags = flags;
1394
1395 self->groups = groups;
1396
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001397 if (PyDict_GET_SIZE(groupindex) > 0) {
1398 Py_INCREF(groupindex);
1399 self->groupindex = groupindex;
1400 if (PyTuple_GET_SIZE(indexgroup) > 0) {
1401 Py_INCREF(indexgroup);
1402 self->indexgroup = indexgroup;
1403 }
1404 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001405
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001406 if (!_validate(self)) {
1407 Py_DECREF(self);
1408 return NULL;
1409 }
1410
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001411 return (PyObject*) self;
1412}
1413
Guido van Rossumb700df92000-03-31 14:59:30 +00001414/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001415/* Code validation */
1416
1417/* To learn more about this code, have a look at the _compile() function in
1418 Lib/sre_compile.py. The validation functions below checks the code array
1419 for conformance with the code patterns generated there.
1420
1421 The nice thing about the generated code is that it is position-independent:
1422 all jumps are relative jumps forward. Also, jumps don't cross each other:
1423 the target of a later jump is always earlier than the target of an earlier
1424 jump. IOW, this is okay:
1425
1426 J---------J-------T--------T
1427 \ \_____/ /
1428 \______________________/
1429
1430 but this is not:
1431
1432 J---------J-------T--------T
1433 \_________\_____/ /
1434 \____________/
1435
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001436 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001437*/
1438
1439/* Defining this one enables tracing of the validator */
1440#undef VVERBOSE
1441
1442/* Trace macro for the validator */
1443#if defined(VVERBOSE)
1444#define VTRACE(v) printf v
1445#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001446#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001447#endif
1448
1449/* Report failure */
1450#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1451
1452/* Extract opcode, argument, or skip count from code array */
1453#define GET_OP \
1454 do { \
1455 VTRACE(("%p: ", code)); \
1456 if (code >= end) FAIL; \
1457 op = *code++; \
1458 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1459 } while (0)
1460#define GET_ARG \
1461 do { \
1462 VTRACE(("%p= ", code)); \
1463 if (code >= end) FAIL; \
1464 arg = *code++; \
1465 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1466 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001467#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001468 do { \
1469 VTRACE(("%p= ", code)); \
1470 if (code >= end) FAIL; \
1471 skip = *code; \
1472 VTRACE(("%lu (skip to %p)\n", \
1473 (unsigned long)skip, code+skip)); \
Benjamin Petersonca470632016-09-06 13:47:26 -07001474 if (skip-adj > (uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001475 FAIL; \
1476 code++; \
1477 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001478#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001479
1480static int
1481_validate_charset(SRE_CODE *code, SRE_CODE *end)
1482{
1483 /* Some variables are manipulated by the macros above */
1484 SRE_CODE op;
1485 SRE_CODE arg;
1486 SRE_CODE offset;
1487 int i;
1488
1489 while (code < end) {
1490 GET_OP;
1491 switch (op) {
1492
1493 case SRE_OP_NEGATE:
1494 break;
1495
1496 case SRE_OP_LITERAL:
1497 GET_ARG;
1498 break;
1499
1500 case SRE_OP_RANGE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001501 case SRE_OP_RANGE_UNI_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001502 GET_ARG;
1503 GET_ARG;
1504 break;
1505
1506 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001507 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Benjamin Petersonca470632016-09-06 13:47:26 -07001508 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001509 FAIL;
1510 code += offset;
1511 break;
1512
1513 case SRE_OP_BIGCHARSET:
1514 GET_ARG; /* Number of blocks */
1515 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001516 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001517 FAIL;
1518 /* Make sure that each byte points to a valid block */
1519 for (i = 0; i < 256; i++) {
1520 if (((unsigned char *)code)[i] >= arg)
1521 FAIL;
1522 }
1523 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001524 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Benjamin Petersonca470632016-09-06 13:47:26 -07001525 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001526 FAIL;
1527 code += offset;
1528 break;
1529
1530 case SRE_OP_CATEGORY:
1531 GET_ARG;
1532 switch (arg) {
1533 case SRE_CATEGORY_DIGIT:
1534 case SRE_CATEGORY_NOT_DIGIT:
1535 case SRE_CATEGORY_SPACE:
1536 case SRE_CATEGORY_NOT_SPACE:
1537 case SRE_CATEGORY_WORD:
1538 case SRE_CATEGORY_NOT_WORD:
1539 case SRE_CATEGORY_LINEBREAK:
1540 case SRE_CATEGORY_NOT_LINEBREAK:
1541 case SRE_CATEGORY_LOC_WORD:
1542 case SRE_CATEGORY_LOC_NOT_WORD:
1543 case SRE_CATEGORY_UNI_DIGIT:
1544 case SRE_CATEGORY_UNI_NOT_DIGIT:
1545 case SRE_CATEGORY_UNI_SPACE:
1546 case SRE_CATEGORY_UNI_NOT_SPACE:
1547 case SRE_CATEGORY_UNI_WORD:
1548 case SRE_CATEGORY_UNI_NOT_WORD:
1549 case SRE_CATEGORY_UNI_LINEBREAK:
1550 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1551 break;
1552 default:
1553 FAIL;
1554 }
1555 break;
1556
1557 default:
1558 FAIL;
1559
1560 }
1561 }
1562
1563 return 1;
1564}
1565
1566static int
1567_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1568{
1569 /* Some variables are manipulated by the macros above */
1570 SRE_CODE op;
1571 SRE_CODE arg;
1572 SRE_CODE skip;
1573
1574 VTRACE(("code=%p, end=%p\n", code, end));
1575
1576 if (code > end)
1577 FAIL;
1578
1579 while (code < end) {
1580 GET_OP;
1581 switch (op) {
1582
1583 case SRE_OP_MARK:
1584 /* We don't check whether marks are properly nested; the
1585 sre_match() code is robust even if they don't, and the worst
1586 you can get is nonsensical match results. */
1587 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001588 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001589 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1590 FAIL;
1591 }
1592 break;
1593
1594 case SRE_OP_LITERAL:
1595 case SRE_OP_NOT_LITERAL:
1596 case SRE_OP_LITERAL_IGNORE:
1597 case SRE_OP_NOT_LITERAL_IGNORE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001598 case SRE_OP_LITERAL_UNI_IGNORE:
1599 case SRE_OP_NOT_LITERAL_UNI_IGNORE:
Serhiy Storchaka898ff032017-05-05 08:53:40 +03001600 case SRE_OP_LITERAL_LOC_IGNORE:
1601 case SRE_OP_NOT_LITERAL_LOC_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001602 GET_ARG;
1603 /* The arg is just a character, nothing to check */
1604 break;
1605
1606 case SRE_OP_SUCCESS:
1607 case SRE_OP_FAILURE:
1608 /* Nothing to check; these normally end the matching process */
1609 break;
1610
1611 case SRE_OP_AT:
1612 GET_ARG;
1613 switch (arg) {
1614 case SRE_AT_BEGINNING:
1615 case SRE_AT_BEGINNING_STRING:
1616 case SRE_AT_BEGINNING_LINE:
1617 case SRE_AT_END:
1618 case SRE_AT_END_LINE:
1619 case SRE_AT_END_STRING:
1620 case SRE_AT_BOUNDARY:
1621 case SRE_AT_NON_BOUNDARY:
1622 case SRE_AT_LOC_BOUNDARY:
1623 case SRE_AT_LOC_NON_BOUNDARY:
1624 case SRE_AT_UNI_BOUNDARY:
1625 case SRE_AT_UNI_NON_BOUNDARY:
1626 break;
1627 default:
1628 FAIL;
1629 }
1630 break;
1631
1632 case SRE_OP_ANY:
1633 case SRE_OP_ANY_ALL:
1634 /* These have no operands */
1635 break;
1636
1637 case SRE_OP_IN:
1638 case SRE_OP_IN_IGNORE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001639 case SRE_OP_IN_UNI_IGNORE:
Serhiy Storchaka898ff032017-05-05 08:53:40 +03001640 case SRE_OP_IN_LOC_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001641 GET_SKIP;
1642 /* Stop 1 before the end; we check the FAILURE below */
1643 if (!_validate_charset(code, code+skip-2))
1644 FAIL;
1645 if (code[skip-2] != SRE_OP_FAILURE)
1646 FAIL;
1647 code += skip-1;
1648 break;
1649
1650 case SRE_OP_INFO:
1651 {
1652 /* A minimal info field is
1653 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1654 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1655 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001656 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001657 SRE_CODE *newcode;
1658 GET_SKIP;
1659 newcode = code+skip-1;
1660 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001661 GET_ARG;
1662 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001663 /* Check that only valid flags are present */
1664 if ((flags & ~(SRE_INFO_PREFIX |
1665 SRE_INFO_LITERAL |
1666 SRE_INFO_CHARSET)) != 0)
1667 FAIL;
1668 /* PREFIX and CHARSET are mutually exclusive */
1669 if ((flags & SRE_INFO_PREFIX) &&
1670 (flags & SRE_INFO_CHARSET))
1671 FAIL;
1672 /* LITERAL implies PREFIX */
1673 if ((flags & SRE_INFO_LITERAL) &&
1674 !(flags & SRE_INFO_PREFIX))
1675 FAIL;
1676 /* Validate the prefix */
1677 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001678 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001679 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001680 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001681 /* Here comes the prefix string */
Benjamin Petersonca470632016-09-06 13:47:26 -07001682 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001683 FAIL;
1684 code += prefix_len;
1685 /* And here comes the overlap table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001686 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001687 FAIL;
1688 /* Each overlap value should be < prefix_len */
1689 for (i = 0; i < prefix_len; i++) {
1690 if (code[i] >= prefix_len)
1691 FAIL;
1692 }
1693 code += prefix_len;
1694 }
1695 /* Validate the charset */
1696 if (flags & SRE_INFO_CHARSET) {
1697 if (!_validate_charset(code, newcode-1))
1698 FAIL;
1699 if (newcode[-1] != SRE_OP_FAILURE)
1700 FAIL;
1701 code = newcode;
1702 }
1703 else if (code != newcode) {
1704 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1705 FAIL;
1706 }
1707 }
1708 break;
1709
1710 case SRE_OP_BRANCH:
1711 {
1712 SRE_CODE *target = NULL;
1713 for (;;) {
1714 GET_SKIP;
1715 if (skip == 0)
1716 break;
1717 /* Stop 2 before the end; we check the JUMP below */
1718 if (!_validate_inner(code, code+skip-3, groups))
1719 FAIL;
1720 code += skip-3;
1721 /* Check that it ends with a JUMP, and that each JUMP
1722 has the same target */
1723 GET_OP;
1724 if (op != SRE_OP_JUMP)
1725 FAIL;
1726 GET_SKIP;
1727 if (target == NULL)
1728 target = code+skip-1;
1729 else if (code+skip-1 != target)
1730 FAIL;
1731 }
1732 }
1733 break;
1734
1735 case SRE_OP_REPEAT_ONE:
1736 case SRE_OP_MIN_REPEAT_ONE:
1737 {
1738 SRE_CODE min, max;
1739 GET_SKIP;
1740 GET_ARG; min = arg;
1741 GET_ARG; max = arg;
1742 if (min > max)
1743 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001744 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001745 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001746 if (!_validate_inner(code, code+skip-4, groups))
1747 FAIL;
1748 code += skip-4;
1749 GET_OP;
1750 if (op != SRE_OP_SUCCESS)
1751 FAIL;
1752 }
1753 break;
1754
1755 case SRE_OP_REPEAT:
1756 {
1757 SRE_CODE min, max;
1758 GET_SKIP;
1759 GET_ARG; min = arg;
1760 GET_ARG; max = arg;
1761 if (min > max)
1762 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001763 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001764 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001765 if (!_validate_inner(code, code+skip-3, groups))
1766 FAIL;
1767 code += skip-3;
1768 GET_OP;
1769 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1770 FAIL;
1771 }
1772 break;
1773
1774 case SRE_OP_GROUPREF:
1775 case SRE_OP_GROUPREF_IGNORE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001776 case SRE_OP_GROUPREF_UNI_IGNORE:
1777 case SRE_OP_GROUPREF_LOC_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001778 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001779 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001780 FAIL;
1781 break;
1782
1783 case SRE_OP_GROUPREF_EXISTS:
1784 /* The regex syntax for this is: '(?(group)then|else)', where
1785 'group' is either an integer group number or a group name,
1786 'then' and 'else' are sub-regexes, and 'else' is optional. */
1787 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001788 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001789 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001790 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001791 code--; /* The skip is relative to the first arg! */
1792 /* There are two possibilities here: if there is both a 'then'
1793 part and an 'else' part, the generated code looks like:
1794
1795 GROUPREF_EXISTS
1796 <group>
1797 <skipyes>
1798 ...then part...
1799 JUMP
1800 <skipno>
1801 (<skipyes> jumps here)
1802 ...else part...
1803 (<skipno> jumps here)
1804
1805 If there is only a 'then' part, it looks like:
1806
1807 GROUPREF_EXISTS
1808 <group>
1809 <skip>
1810 ...then part...
1811 (<skip> jumps here)
1812
1813 There is no direct way to decide which it is, and we don't want
1814 to allow arbitrary jumps anywhere in the code; so we just look
1815 for a JUMP opcode preceding our skip target.
1816 */
Benjamin Petersonca470632016-09-06 13:47:26 -07001817 if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001818 code[skip-3] == SRE_OP_JUMP)
1819 {
1820 VTRACE(("both then and else parts present\n"));
1821 if (!_validate_inner(code+1, code+skip-3, groups))
1822 FAIL;
1823 code += skip-2; /* Position after JUMP, at <skipno> */
1824 GET_SKIP;
1825 if (!_validate_inner(code, code+skip-1, groups))
1826 FAIL;
1827 code += skip-1;
1828 }
1829 else {
1830 VTRACE(("only a then part present\n"));
1831 if (!_validate_inner(code+1, code+skip-1, groups))
1832 FAIL;
1833 code += skip-1;
1834 }
1835 break;
1836
1837 case SRE_OP_ASSERT:
1838 case SRE_OP_ASSERT_NOT:
1839 GET_SKIP;
1840 GET_ARG; /* 0 for lookahead, width for lookbehind */
1841 code--; /* Back up over arg to simplify math below */
1842 if (arg & 0x80000000)
1843 FAIL; /* Width too large */
1844 /* Stop 1 before the end; we check the SUCCESS below */
1845 if (!_validate_inner(code+1, code+skip-2, groups))
1846 FAIL;
1847 code += skip-2;
1848 GET_OP;
1849 if (op != SRE_OP_SUCCESS)
1850 FAIL;
1851 break;
1852
1853 default:
1854 FAIL;
1855
1856 }
1857 }
1858
1859 VTRACE(("okay\n"));
1860 return 1;
1861}
1862
1863static int
1864_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1865{
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001866 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1867 code >= end || end[-1] != SRE_OP_SUCCESS)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001868 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001869 return _validate_inner(code, end-1, groups);
1870}
1871
1872static int
1873_validate(PatternObject *self)
1874{
1875 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1876 {
1877 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1878 return 0;
1879 }
1880 else
1881 VTRACE(("Success!\n"));
1882 return 1;
1883}
1884
1885/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001886/* match methods */
1887
1888static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001889match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001890{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001891 Py_XDECREF(self->regs);
1892 Py_XDECREF(self->string);
1893 Py_DECREF(self->pattern);
1894 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001895}
1896
1897static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001898match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001899{
Serhiy Storchaka25324972013-10-16 12:46:28 +03001900 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001901 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001902 Py_buffer view;
1903 PyObject *result;
1904 void* ptr;
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02001905 Py_ssize_t i, j;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001906
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001907 if (index < 0 || index >= self->groups) {
1908 /* raise IndexError if we were given a bad group number */
1909 PyErr_SetString(
1910 PyExc_IndexError,
1911 "no such group"
1912 );
1913 return NULL;
1914 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001915
Fredrik Lundh6f013982000-07-03 18:44:21 +00001916 index *= 2;
1917
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001918 if (self->string == Py_None || self->mark[index] < 0) {
1919 /* return default value if the string or group is undefined */
1920 Py_INCREF(def);
1921 return def;
1922 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001923
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001924 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001925 if (ptr == NULL)
1926 return NULL;
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02001927
1928 i = self->mark[index];
1929 j = self->mark[index+1];
1930 i = Py_MIN(i, length);
1931 j = Py_MIN(j, length);
1932 result = getslice(isbytes, ptr, self->string, i, j);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001933 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001934 PyBuffer_Release(&view);
1935 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001936}
1937
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001938static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001939match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001940{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001941 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001942
Guido van Rossumddefaf32007-01-14 03:31:43 +00001943 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001944 /* Default value */
1945 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001946
Serhiy Storchaka977b3ac2016-06-18 16:48:07 +03001947 if (PyIndex_Check(index)) {
1948 return PyNumber_AsSsize_t(index, NULL);
1949 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001950
Fredrik Lundh6f013982000-07-03 18:44:21 +00001951 i = -1;
1952
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001953 if (self->pattern->groupindex) {
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001954 index = PyDict_GetItem(self->pattern->groupindex, index);
1955 if (index && PyLong_Check(index)) {
1956 i = PyLong_AsSsize_t(index);
1957 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001958 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001959
1960 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001961}
1962
1963static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001964match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001965{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001966 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001967}
1968
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001969/*[clinic input]
1970_sre.SRE_Match.expand
1971
1972 template: object
1973
1974Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
1975[clinic start generated code]*/
1976
1977static PyObject *
1978_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
1979/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001980{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001981 /* delegate to Python code */
1982 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001983 SRE_PY_MODULE, "_expand",
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001984 PyTuple_Pack(3, self->pattern, self, template)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001985 );
1986}
1987
1988static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001989match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001990{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001991 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001992 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001993
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001994 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001995
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001996 switch (size) {
1997 case 0:
Serhiy Storchakaba85d692017-03-30 09:09:41 +03001998 result = match_getslice(self, _PyLong_Zero, Py_None);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001999 break;
2000 case 1:
2001 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2002 break;
2003 default:
2004 /* fetch multiple items */
2005 result = PyTuple_New(size);
2006 if (!result)
2007 return NULL;
2008 for (i = 0; i < size; i++) {
2009 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002010 self, PyTuple_GET_ITEM(args, i), Py_None
2011 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002012 if (!item) {
2013 Py_DECREF(result);
2014 return NULL;
2015 }
2016 PyTuple_SET_ITEM(result, i, item);
2017 }
2018 break;
2019 }
2020 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002021}
2022
Eric V. Smith605bdae2016-09-11 08:55:43 -04002023static PyObject*
2024match_getitem(MatchObject* self, PyObject* name)
2025{
2026 return match_getslice(self, name, Py_None);
2027}
2028
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002029/*[clinic input]
2030_sre.SRE_Match.groups
2031
2032 default: object = None
2033 Is used for groups that did not participate in the match.
2034
2035Return a tuple containing all the subgroups of the match, from 1.
2036[clinic start generated code]*/
2037
2038static PyObject *
2039_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2040/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002041{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002042 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002043 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002044
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002045 result = PyTuple_New(self->groups-1);
2046 if (!result)
2047 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002048
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002049 for (index = 1; index < self->groups; index++) {
2050 PyObject* item;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002051 item = match_getslice_by_index(self, index, default_value);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002052 if (!item) {
2053 Py_DECREF(result);
2054 return NULL;
2055 }
2056 PyTuple_SET_ITEM(result, index-1, item);
2057 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002058
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002059 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002060}
2061
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002062/*[clinic input]
2063_sre.SRE_Match.groupdict
2064
2065 default: object = None
2066 Is used for groups that did not participate in the match.
2067
2068Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2069[clinic start generated code]*/
2070
2071static PyObject *
2072_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2073/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002074{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002075 PyObject *result;
2076 PyObject *key;
2077 PyObject *value;
2078 Py_ssize_t pos = 0;
2079 Py_hash_t hash;
Guido van Rossumb700df92000-03-31 14:59:30 +00002080
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002081 result = PyDict_New();
2082 if (!result || !self->pattern->groupindex)
2083 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002084
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002085 while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002086 int status;
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002087 Py_INCREF(key);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002088 value = match_getslice(self, key, default_value);
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002089 if (!value) {
2090 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002091 goto failed;
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002092 }
2093 status = _PyDict_SetItem_KnownHash(result, key, value, hash);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002094 Py_DECREF(value);
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002095 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002096 if (status < 0)
2097 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002098 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002099
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002100 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002101
2102failed:
Fredrik Lundh770617b2001-01-14 15:06:11 +00002103 Py_DECREF(result);
2104 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002105}
2106
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002107/*[clinic input]
2108_sre.SRE_Match.start -> Py_ssize_t
2109
2110 group: object(c_default="NULL") = 0
2111 /
2112
2113Return index of the start of the substring matched by group.
2114[clinic start generated code]*/
2115
2116static Py_ssize_t
2117_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2118/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002119{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002120 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002121
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002122 if (index < 0 || index >= self->groups) {
2123 PyErr_SetString(
2124 PyExc_IndexError,
2125 "no such group"
2126 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002127 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002128 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002129
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002130 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002131 return self->mark[index*2];
Guido van Rossumb700df92000-03-31 14:59:30 +00002132}
2133
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002134/*[clinic input]
2135_sre.SRE_Match.end -> Py_ssize_t
2136
2137 group: object(c_default="NULL") = 0
2138 /
2139
2140Return index of the end of the substring matched by group.
2141[clinic start generated code]*/
2142
2143static Py_ssize_t
2144_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2145/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002146{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002147 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002148
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002149 if (index < 0 || index >= self->groups) {
2150 PyErr_SetString(
2151 PyExc_IndexError,
2152 "no such group"
2153 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002154 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002155 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002156
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002157 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002158 return self->mark[index*2+1];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002159}
2160
2161LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002162_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002163{
2164 PyObject* pair;
2165 PyObject* item;
2166
2167 pair = PyTuple_New(2);
2168 if (!pair)
2169 return NULL;
2170
Christian Heimes217cfd12007-12-02 14:31:20 +00002171 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002172 if (!item)
2173 goto error;
2174 PyTuple_SET_ITEM(pair, 0, item);
2175
Christian Heimes217cfd12007-12-02 14:31:20 +00002176 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002177 if (!item)
2178 goto error;
2179 PyTuple_SET_ITEM(pair, 1, item);
2180
2181 return pair;
2182
2183 error:
2184 Py_DECREF(pair);
2185 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002186}
2187
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002188/*[clinic input]
2189_sre.SRE_Match.span
2190
2191 group: object(c_default="NULL") = 0
2192 /
2193
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002194For match object m, return the 2-tuple (m.start(group), m.end(group)).
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002195[clinic start generated code]*/
2196
2197static PyObject *
2198_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002199/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002200{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002201 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002202
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002203 if (index < 0 || index >= self->groups) {
2204 PyErr_SetString(
2205 PyExc_IndexError,
2206 "no such group"
2207 );
2208 return NULL;
2209 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002210
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002211 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002212 return _pair(self->mark[index*2], self->mark[index*2+1]);
2213}
2214
2215static PyObject*
2216match_regs(MatchObject* self)
2217{
2218 PyObject* regs;
2219 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002220 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002221
2222 regs = PyTuple_New(self->groups);
2223 if (!regs)
2224 return NULL;
2225
2226 for (index = 0; index < self->groups; index++) {
2227 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2228 if (!item) {
2229 Py_DECREF(regs);
2230 return NULL;
2231 }
2232 PyTuple_SET_ITEM(regs, index, item);
2233 }
2234
2235 Py_INCREF(regs);
2236 self->regs = regs;
2237
2238 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002239}
2240
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002241/*[clinic input]
2242_sre.SRE_Match.__copy__
2243
2244[clinic start generated code]*/
2245
2246static PyObject *
2247_sre_SRE_Match___copy___impl(MatchObject *self)
2248/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002249{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002250 Py_INCREF(self);
2251 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002252}
2253
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002254/*[clinic input]
2255_sre.SRE_Match.__deepcopy__
2256
2257 memo: object
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002258 /
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002259
2260[clinic start generated code]*/
2261
2262static PyObject *
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002263_sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
2264/*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002265{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002266 Py_INCREF(self);
2267 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002268}
2269
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002270PyDoc_STRVAR(match_doc,
2271"The result of re.match() and re.search().\n\
2272Match objects always have a boolean value of True.");
2273
2274PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002275"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002276 Return subgroup(s) of the match by indices or names.\n\
2277 For 0 returns the entire match.");
2278
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002279static PyObject *
2280match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002281{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002282 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002283 return PyLong_FromSsize_t(self->lastindex);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002284 Py_RETURN_NONE;
Guido van Rossumb700df92000-03-31 14:59:30 +00002285}
2286
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002287static PyObject *
2288match_lastgroup_get(MatchObject *self)
2289{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002290 if (self->pattern->indexgroup &&
2291 self->lastindex >= 0 &&
2292 self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2293 {
2294 PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2295 self->lastindex);
2296 Py_INCREF(result);
2297 return result;
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002298 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002299 Py_RETURN_NONE;
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002300}
2301
2302static PyObject *
2303match_regs_get(MatchObject *self)
2304{
2305 if (self->regs) {
2306 Py_INCREF(self->regs);
2307 return self->regs;
2308 } else
2309 return match_regs(self);
2310}
2311
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002312static PyObject *
2313match_repr(MatchObject *self)
2314{
2315 PyObject *result;
2316 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2317 if (group0 == NULL)
2318 return NULL;
2319 result = PyUnicode_FromFormat(
2320 "<%s object; span=(%d, %d), match=%.50R>",
2321 Py_TYPE(self)->tp_name,
2322 self->mark[0], self->mark[1], group0);
2323 Py_DECREF(group0);
2324 return result;
2325}
2326
2327
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002328static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002329pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002330{
2331 /* create match object (from state object) */
2332
2333 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002334 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002335 char* base;
2336 int n;
2337
2338 if (status > 0) {
2339
2340 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002341 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002342 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2343 2*(pattern->groups+1));
2344 if (!match)
2345 return NULL;
2346
2347 Py_INCREF(pattern);
2348 match->pattern = pattern;
2349
2350 Py_INCREF(state->string);
2351 match->string = state->string;
2352
2353 match->regs = NULL;
2354 match->groups = pattern->groups+1;
2355
2356 /* fill in group slices */
2357
2358 base = (char*) state->beginning;
2359 n = state->charsize;
2360
2361 match->mark[0] = ((char*) state->start - base) / n;
2362 match->mark[1] = ((char*) state->ptr - base) / n;
2363
2364 for (i = j = 0; i < pattern->groups; i++, j+=2)
2365 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2366 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2367 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2368 } else
2369 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2370
2371 match->pos = state->pos;
2372 match->endpos = state->endpos;
2373
2374 match->lastindex = state->lastindex;
2375
2376 return (PyObject*) match;
2377
2378 } else if (status == 0) {
2379
2380 /* no match */
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002381 Py_RETURN_NONE;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002382
2383 }
2384
2385 /* internal error */
2386 pattern_error(status);
2387 return NULL;
2388}
2389
2390
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002391/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002392/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002393
2394static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002395scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002396{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002397 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002398 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002399 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002400}
2401
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002402/*[clinic input]
2403_sre.SRE_Scanner.match
2404
2405[clinic start generated code]*/
2406
2407static PyObject *
2408_sre_SRE_Scanner_match_impl(ScannerObject *self)
2409/*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002410{
2411 SRE_STATE* state = &self->state;
2412 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002413 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002414
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002415 if (state->start == NULL)
2416 Py_RETURN_NONE;
2417
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002418 state_reset(state);
2419
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002420 state->ptr = state->start;
2421
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002422 status = sre_match(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002423 if (PyErr_Occurred())
2424 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002425
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002426 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002427 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002428
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002429 if (status == 0)
2430 state->start = NULL;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002431 else {
2432 state->must_advance = (state->ptr == state->start);
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002433 state->start = state->ptr;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002434 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002435
2436 return match;
2437}
2438
2439
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002440/*[clinic input]
2441_sre.SRE_Scanner.search
2442
2443[clinic start generated code]*/
2444
2445static PyObject *
2446_sre_SRE_Scanner_search_impl(ScannerObject *self)
2447/*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002448{
2449 SRE_STATE* state = &self->state;
2450 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002451 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002452
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002453 if (state->start == NULL)
2454 Py_RETURN_NONE;
2455
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002456 state_reset(state);
2457
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002458 state->ptr = state->start;
2459
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002460 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002461 if (PyErr_Occurred())
2462 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002463
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002464 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002465 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002466
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002467 if (status == 0)
2468 state->start = NULL;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002469 else {
2470 state->must_advance = (state->ptr == state->start);
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002471 state->start = state->ptr;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002472 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002473
2474 return match;
2475}
2476
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002477static PyObject *
2478pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002479{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002480 ScannerObject* scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002481
2482 /* create scanner object */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002483 scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2484 if (!scanner)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002485 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002486 scanner->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002487
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002488 /* create search state object */
2489 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2490 Py_DECREF(scanner);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002491 return NULL;
2492 }
2493
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002494 Py_INCREF(self);
2495 scanner->pattern = (PyObject*) self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002496
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002497 return (PyObject*) scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002498}
2499
Victor Stinnerb44fb122016-11-21 16:35:08 +01002500static Py_hash_t
2501pattern_hash(PatternObject *self)
2502{
2503 Py_hash_t hash, hash2;
2504
2505 hash = PyObject_Hash(self->pattern);
2506 if (hash == -1) {
2507 return -1;
2508 }
2509
2510 hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2511 hash ^= hash2;
2512
2513 hash ^= self->flags;
2514 hash ^= self->isbytes;
2515 hash ^= self->codesize;
2516
2517 if (hash == -1) {
2518 hash = -2;
2519 }
2520 return hash;
2521}
2522
2523static PyObject*
2524pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2525{
2526 PatternObject *left, *right;
2527 int cmp;
2528
2529 if (op != Py_EQ && op != Py_NE) {
2530 Py_RETURN_NOTIMPLEMENTED;
2531 }
2532
2533 if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
2534 Py_RETURN_NOTIMPLEMENTED;
2535 }
Victor Stinnerbcf4dcc2016-11-22 15:30:38 +01002536
2537 if (lefto == righto) {
2538 /* a pattern is equal to itself */
2539 return PyBool_FromLong(op == Py_EQ);
2540 }
2541
Victor Stinnerb44fb122016-11-21 16:35:08 +01002542 left = (PatternObject *)lefto;
2543 right = (PatternObject *)righto;
2544
2545 cmp = (left->flags == right->flags
2546 && left->isbytes == right->isbytes
Victor Stinnere670b2d2016-11-22 15:23:00 +01002547 && left->codesize == right->codesize);
Victor Stinnerb44fb122016-11-21 16:35:08 +01002548 if (cmp) {
2549 /* Compare the code and the pattern because the same pattern can
2550 produce different codes depending on the locale used to compile the
2551 pattern when the re.LOCALE flag is used. Don't compare groups,
2552 indexgroup nor groupindex: they are derivated from the pattern. */
2553 cmp = (memcmp(left->code, right->code,
2554 sizeof(left->code[0]) * left->codesize) == 0);
2555 }
2556 if (cmp) {
2557 cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2558 Py_EQ);
2559 if (cmp < 0) {
2560 return NULL;
2561 }
2562 }
2563 if (op == Py_NE) {
2564 cmp = !cmp;
2565 }
2566 return PyBool_FromLong(cmp);
2567}
2568
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002569#include "clinic/_sre.c.h"
2570
2571static PyMethodDef pattern_methods[] = {
2572 _SRE_SRE_PATTERN_MATCH_METHODDEF
2573 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2574 _SRE_SRE_PATTERN_SEARCH_METHODDEF
2575 _SRE_SRE_PATTERN_SUB_METHODDEF
2576 _SRE_SRE_PATTERN_SUBN_METHODDEF
2577 _SRE_SRE_PATTERN_FINDALL_METHODDEF
2578 _SRE_SRE_PATTERN_SPLIT_METHODDEF
2579 _SRE_SRE_PATTERN_FINDITER_METHODDEF
2580 _SRE_SRE_PATTERN_SCANNER_METHODDEF
2581 _SRE_SRE_PATTERN___COPY___METHODDEF
2582 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2583 {NULL, NULL}
2584};
2585
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002586static PyGetSetDef pattern_getset[] = {
2587 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2588 "A dictionary mapping group names to group numbers."},
2589 {NULL} /* Sentinel */
2590};
2591
2592#define PAT_OFF(x) offsetof(PatternObject, x)
2593static PyMemberDef pattern_members[] = {
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002594 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY,
2595 "The pattern string from which the RE object was compiled."},
2596 {"flags", T_INT, PAT_OFF(flags), READONLY,
2597 "The regex matching flags."},
2598 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY,
2599 "The number of capturing groups in the pattern."},
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002600 {NULL} /* Sentinel */
2601};
2602
2603static PyTypeObject Pattern_Type = {
2604 PyVarObject_HEAD_INIT(NULL, 0)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002605 "re.Pattern",
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002606 sizeof(PatternObject), sizeof(SRE_CODE),
2607 (destructor)pattern_dealloc, /* tp_dealloc */
2608 0, /* tp_print */
2609 0, /* tp_getattr */
2610 0, /* tp_setattr */
2611 0, /* tp_reserved */
2612 (reprfunc)pattern_repr, /* tp_repr */
2613 0, /* tp_as_number */
2614 0, /* tp_as_sequence */
2615 0, /* tp_as_mapping */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002616 (hashfunc)pattern_hash, /* tp_hash */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002617 0, /* tp_call */
2618 0, /* tp_str */
2619 0, /* tp_getattro */
2620 0, /* tp_setattro */
2621 0, /* tp_as_buffer */
2622 Py_TPFLAGS_DEFAULT, /* tp_flags */
2623 pattern_doc, /* tp_doc */
2624 0, /* tp_traverse */
2625 0, /* tp_clear */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002626 pattern_richcompare, /* tp_richcompare */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002627 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2628 0, /* tp_iter */
2629 0, /* tp_iternext */
2630 pattern_methods, /* tp_methods */
2631 pattern_members, /* tp_members */
2632 pattern_getset, /* tp_getset */
2633};
2634
Eric V. Smith605bdae2016-09-11 08:55:43 -04002635/* Match objects do not support length or assignment, but do support
2636 __getitem__. */
2637static PyMappingMethods match_as_mapping = {
2638 NULL,
2639 (binaryfunc)match_getitem,
2640 NULL
2641};
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002642
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002643static PyMethodDef match_methods[] = {
2644 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2645 _SRE_SRE_MATCH_START_METHODDEF
2646 _SRE_SRE_MATCH_END_METHODDEF
2647 _SRE_SRE_MATCH_SPAN_METHODDEF
2648 _SRE_SRE_MATCH_GROUPS_METHODDEF
2649 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2650 _SRE_SRE_MATCH_EXPAND_METHODDEF
2651 _SRE_SRE_MATCH___COPY___METHODDEF
2652 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2653 {NULL, NULL}
2654};
2655
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002656static PyGetSetDef match_getset[] = {
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002657 {"lastindex", (getter)match_lastindex_get, (setter)NULL,
2658 "The integer index of the last matched capturing group."},
2659 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
2660 "The name of the last matched capturing group."},
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002661 {"regs", (getter)match_regs_get, (setter)NULL},
2662 {NULL}
2663};
2664
2665#define MATCH_OFF(x) offsetof(MatchObject, x)
2666static PyMemberDef match_members[] = {
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002667 {"string", T_OBJECT, MATCH_OFF(string), READONLY,
2668 "The string passed to match() or search()."},
2669 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY,
2670 "The regular expression object."},
2671 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY,
2672 "The index into the string at which the RE engine started looking for a match."},
2673 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY,
2674 "The index into the string beyond which the RE engine will not go."},
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002675 {NULL}
2676};
2677
2678/* FIXME: implement setattr("string", None) as a special case (to
2679 detach the associated string, if any */
2680
2681static PyTypeObject Match_Type = {
2682 PyVarObject_HEAD_INIT(NULL,0)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002683 "re.Match",
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002684 sizeof(MatchObject), sizeof(Py_ssize_t),
2685 (destructor)match_dealloc, /* tp_dealloc */
2686 0, /* tp_print */
2687 0, /* tp_getattr */
2688 0, /* tp_setattr */
2689 0, /* tp_reserved */
2690 (reprfunc)match_repr, /* tp_repr */
2691 0, /* tp_as_number */
2692 0, /* tp_as_sequence */
Eric V. Smith605bdae2016-09-11 08:55:43 -04002693 &match_as_mapping, /* tp_as_mapping */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002694 0, /* tp_hash */
2695 0, /* tp_call */
2696 0, /* tp_str */
2697 0, /* tp_getattro */
2698 0, /* tp_setattro */
2699 0, /* tp_as_buffer */
2700 Py_TPFLAGS_DEFAULT, /* tp_flags */
2701 match_doc, /* tp_doc */
2702 0, /* tp_traverse */
2703 0, /* tp_clear */
2704 0, /* tp_richcompare */
2705 0, /* tp_weaklistoffset */
2706 0, /* tp_iter */
2707 0, /* tp_iternext */
2708 match_methods, /* tp_methods */
2709 match_members, /* tp_members */
2710 match_getset, /* tp_getset */
2711};
2712
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002713static PyMethodDef scanner_methods[] = {
2714 _SRE_SRE_SCANNER_MATCH_METHODDEF
2715 _SRE_SRE_SCANNER_SEARCH_METHODDEF
2716 {NULL, NULL}
2717};
2718
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002719#define SCAN_OFF(x) offsetof(ScannerObject, x)
2720static PyMemberDef scanner_members[] = {
2721 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2722 {NULL} /* Sentinel */
2723};
2724
2725static PyTypeObject Scanner_Type = {
2726 PyVarObject_HEAD_INIT(NULL, 0)
2727 "_" SRE_MODULE ".SRE_Scanner",
2728 sizeof(ScannerObject), 0,
2729 (destructor)scanner_dealloc,/* tp_dealloc */
2730 0, /* tp_print */
2731 0, /* tp_getattr */
2732 0, /* tp_setattr */
2733 0, /* tp_reserved */
2734 0, /* tp_repr */
2735 0, /* tp_as_number */
2736 0, /* tp_as_sequence */
2737 0, /* tp_as_mapping */
2738 0, /* tp_hash */
2739 0, /* tp_call */
2740 0, /* tp_str */
2741 0, /* tp_getattro */
2742 0, /* tp_setattro */
2743 0, /* tp_as_buffer */
2744 Py_TPFLAGS_DEFAULT, /* tp_flags */
2745 0, /* tp_doc */
2746 0, /* tp_traverse */
2747 0, /* tp_clear */
2748 0, /* tp_richcompare */
2749 0, /* tp_weaklistoffset */
2750 0, /* tp_iter */
2751 0, /* tp_iternext */
2752 scanner_methods, /* tp_methods */
2753 scanner_members, /* tp_members */
2754 0, /* tp_getset */
2755};
2756
Guido van Rossumb700df92000-03-31 14:59:30 +00002757static PyMethodDef _functions[] = {
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002758 _SRE_COMPILE_METHODDEF
2759 _SRE_GETCODESIZE_METHODDEF
Serhiy Storchaka6d336a02017-05-09 23:37:14 +03002760 _SRE_ASCII_ISCASED_METHODDEF
2761 _SRE_UNICODE_ISCASED_METHODDEF
Serhiy Storchaka7186cc22017-05-05 10:42:46 +03002762 _SRE_ASCII_TOLOWER_METHODDEF
2763 _SRE_UNICODE_TOLOWER_METHODDEF
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002764 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002765};
2766
Martin v. Löwis1a214512008-06-11 05:26:20 +00002767static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002768 PyModuleDef_HEAD_INIT,
2769 "_" SRE_MODULE,
2770 NULL,
2771 -1,
2772 _functions,
2773 NULL,
2774 NULL,
2775 NULL,
2776 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002777};
2778
2779PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002780{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002781 PyObject* m;
2782 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002783 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002784
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002785 /* Patch object types */
2786 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2787 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002788 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002789
Martin v. Löwis1a214512008-06-11 05:26:20 +00002790 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002791 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002792 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002793 d = PyModule_GetDict(m);
2794
Christian Heimes217cfd12007-12-02 14:31:20 +00002795 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002796 if (x) {
2797 PyDict_SetItemString(d, "MAGIC", x);
2798 Py_DECREF(x);
2799 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002800
Christian Heimes217cfd12007-12-02 14:31:20 +00002801 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002802 if (x) {
2803 PyDict_SetItemString(d, "CODESIZE", x);
2804 Py_DECREF(x);
2805 }
2806
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002807 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2808 if (x) {
2809 PyDict_SetItemString(d, "MAXREPEAT", x);
2810 Py_DECREF(x);
2811 }
2812
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03002813 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2814 if (x) {
2815 PyDict_SetItemString(d, "MAXGROUPS", x);
2816 Py_DECREF(x);
2817 }
2818
Neal Norwitzfe537132007-08-26 03:55:15 +00002819 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002820 if (x) {
2821 PyDict_SetItemString(d, "copyright", x);
2822 Py_DECREF(x);
2823 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002824 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002825}
2826
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002827/* vim:ts=4:sw=4:et
2828*/