blob: a86c5f252b5e589e609ca523e00cc3388bbbcdb9 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020038static const char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000063
Fredrik Lundh80946112000-06-29 18:03:25 +000064#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000065#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000066#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000067/* fastest possible local call under MSVC */
68#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000069#else
Benjamin Peterson791dc832017-04-20 23:52:19 -070070#define LOCAL(type) static inline type
Guido van Rossumb700df92000-03-31 14:59:30 +000071#endif
72
73/* error codes */
74#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000075#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000076#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000077#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000078#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000079
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000081#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000082#else
83#define TRACE(v)
84#endif
85
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000086/* -------------------------------------------------------------------- */
87/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000088
Fredrik Lundh436c3d582000-06-29 08:58:44 +000089#define SRE_IS_DIGIT(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030090 ((ch) < 128 && Py_ISDIGIT(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000091#define SRE_IS_SPACE(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030092 ((ch) < 128 && Py_ISSPACE(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000093#define SRE_IS_LINEBREAK(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030094 ((ch) == '\n')
Fredrik Lundh436c3d582000-06-29 08:58:44 +000095#define SRE_IS_ALNUM(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030096 ((ch) < 128 && Py_ISALNUM(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000097#define SRE_IS_WORD(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030098 ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
Guido van Rossumb700df92000-03-31 14:59:30 +000099
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000100static unsigned int sre_lower(unsigned int ch)
101{
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300102 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000103}
104
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200105static unsigned int sre_upper(unsigned int ch)
106{
107 return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
108}
109
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000110/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000111/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
112 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000113#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000114#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
115
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000116static unsigned int sre_lower_locale(unsigned int ch)
117{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000118 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000119}
120
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200121static unsigned int sre_upper_locale(unsigned int ch)
122{
123 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
124}
125
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000126/* unicode-specific character predicates */
127
Victor Stinner0058b862011-09-29 03:27:47 +0200128#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
129#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
130#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
131#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
132#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000133
134static unsigned int sre_lower_unicode(unsigned int ch)
135{
Victor Stinner0058b862011-09-29 03:27:47 +0200136 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000137}
138
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200139static unsigned int sre_upper_unicode(unsigned int ch)
140{
141 return (unsigned int) Py_UNICODE_TOUPPER(ch);
142}
143
Guido van Rossumb700df92000-03-31 14:59:30 +0000144LOCAL(int)
145sre_category(SRE_CODE category, unsigned int ch)
146{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000147 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000148
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000149 case SRE_CATEGORY_DIGIT:
150 return SRE_IS_DIGIT(ch);
151 case SRE_CATEGORY_NOT_DIGIT:
152 return !SRE_IS_DIGIT(ch);
153 case SRE_CATEGORY_SPACE:
154 return SRE_IS_SPACE(ch);
155 case SRE_CATEGORY_NOT_SPACE:
156 return !SRE_IS_SPACE(ch);
157 case SRE_CATEGORY_WORD:
158 return SRE_IS_WORD(ch);
159 case SRE_CATEGORY_NOT_WORD:
160 return !SRE_IS_WORD(ch);
161 case SRE_CATEGORY_LINEBREAK:
162 return SRE_IS_LINEBREAK(ch);
163 case SRE_CATEGORY_NOT_LINEBREAK:
164 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000165
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000166 case SRE_CATEGORY_LOC_WORD:
167 return SRE_LOC_IS_WORD(ch);
168 case SRE_CATEGORY_LOC_NOT_WORD:
169 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000170
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000171 case SRE_CATEGORY_UNI_DIGIT:
172 return SRE_UNI_IS_DIGIT(ch);
173 case SRE_CATEGORY_UNI_NOT_DIGIT:
174 return !SRE_UNI_IS_DIGIT(ch);
175 case SRE_CATEGORY_UNI_SPACE:
176 return SRE_UNI_IS_SPACE(ch);
177 case SRE_CATEGORY_UNI_NOT_SPACE:
178 return !SRE_UNI_IS_SPACE(ch);
179 case SRE_CATEGORY_UNI_WORD:
180 return SRE_UNI_IS_WORD(ch);
181 case SRE_CATEGORY_UNI_NOT_WORD:
182 return !SRE_UNI_IS_WORD(ch);
183 case SRE_CATEGORY_UNI_LINEBREAK:
184 return SRE_UNI_IS_LINEBREAK(ch);
185 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
186 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000187 }
188 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000189}
190
191/* helpers */
192
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000193static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000194data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000195{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000196 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000197 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000198 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000199 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000200 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000201}
202
203static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000204data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000205{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000206 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000207 minsize = state->data_stack_base+size;
208 cursize = state->data_stack_size;
209 if (cursize < minsize) {
210 void* stack;
211 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300212 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000213 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000214 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000215 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000216 return SRE_ERROR_MEMORY;
217 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000218 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000219 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000220 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000221 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000222}
223
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000224/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000225
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300226#define SRE_CHAR Py_UCS1
227#define SIZEOF_SRE_CHAR 1
228#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300229#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000230
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300231/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000232
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300233#define SRE_CHAR Py_UCS2
234#define SIZEOF_SRE_CHAR 2
235#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300236#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000237
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300238/* generate 32-bit unicode version */
239
240#define SRE_CHAR Py_UCS4
241#define SIZEOF_SRE_CHAR 4
242#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300243#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000244
245/* -------------------------------------------------------------------- */
246/* factories and destructors */
247
248/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100249static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300250static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
Guido van Rossumb700df92000-03-31 14:59:30 +0000251
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300252
253/*[clinic input]
254module _sre
255class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
256class _sre.SRE_Match "MatchObject *" "&Match_Type"
257class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
258[clinic start generated code]*/
259/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
260
Larry Hastings2d0a69a2015-05-03 14:49:19 -0700261static PyTypeObject Pattern_Type;
262static PyTypeObject Match_Type;
263static PyTypeObject Scanner_Type;
264
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300265/*[clinic input]
266_sre.getcodesize -> int
267[clinic start generated code]*/
268
269static int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300270_sre_getcodesize_impl(PyObject *module)
271/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000272{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300273 return sizeof(SRE_CODE);
Guido van Rossumb700df92000-03-31 14:59:30 +0000274}
275
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300276/*[clinic input]
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300277_sre.ascii_tolower -> int
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300278
279 character: int
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300280 /
281
282[clinic start generated code]*/
283
284static int
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300285_sre_ascii_tolower_impl(PyObject *module, int character)
286/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000287{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300288 return sre_lower(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000289}
290
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300291/*[clinic input]
292_sre.unicode_tolower -> int
293
294 character: int
295 /
296
297[clinic start generated code]*/
298
299static int
300_sre_unicode_tolower_impl(PyObject *module, int character)
301/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
302{
303 return sre_lower_unicode(character);
304}
305
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000306LOCAL(void)
307state_reset(SRE_STATE* state)
308{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000309 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000310 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000311
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000312 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000313 state->lastindex = -1;
314
315 state->repeat = NULL;
316
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000317 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000318}
319
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000320static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200321getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300322 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600323 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000324{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000325 /* given a python object, return a data pointer, a length (in
326 characters), and a character size. return NULL if the object
327 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000328
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000329 /* Unicode objects do not support the buffer API. So, get the data
330 directly instead. */
331 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200332 if (PyUnicode_READY(string) == -1)
333 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200334 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200335 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300336 *p_isbytes = 0;
337 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000338 }
339
Victor Stinner0058b862011-09-29 03:27:47 +0200340 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300341 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200342 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300343 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000344 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000345
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300346 *p_length = view->len;
347 *p_charsize = 1;
348 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000349
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300350 if (view->buf == NULL) {
351 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
352 PyBuffer_Release(view);
353 view->buf = NULL;
354 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000355 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300356 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000357}
358
359LOCAL(PyObject*)
360state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000361 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000362{
363 /* prepare state object */
364
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000365 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300366 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000367 void* ptr;
368
369 memset(state, 0, sizeof(SRE_STATE));
370
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300371 state->mark = PyMem_New(void *, pattern->groups * 2);
372 if (!state->mark) {
373 PyErr_NoMemory();
374 goto err;
375 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000376 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000377 state->lastindex = -1;
378
Benjamin Petersone48944b2012-03-07 14:50:25 -0600379 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300380 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000381 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600382 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000383
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300384 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600385 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200386 "cannot use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600387 goto err;
388 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300389 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600390 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200391 "cannot use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600392 goto err;
393 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000394
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000395 /* adjust boundaries */
396 if (start < 0)
397 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000398 else if (start > length)
399 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000400
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000401 if (end < 0)
402 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000403 else if (end > length)
404 end = length;
405
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300406 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000407 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000408
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000409 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000410
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000411 state->start = (void*) ((char*) ptr + start * state->charsize);
412 state->end = (void*) ((char*) ptr + end * state->charsize);
413
414 Py_INCREF(string);
415 state->string = string;
416 state->pos = start;
417 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000418
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200419 if (pattern->flags & SRE_FLAG_LOCALE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000420 state->lower = sre_lower_locale;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200421 state->upper = sre_upper_locale;
422 }
423 else if (pattern->flags & SRE_FLAG_UNICODE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000424 state->lower = sre_lower_unicode;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200425 state->upper = sre_upper_unicode;
426 }
427 else {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000428 state->lower = sre_lower;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200429 state->upper = sre_upper;
430 }
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000431
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000432 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600433 err:
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300434 PyMem_Del(state->mark);
435 state->mark = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600436 if (state->buffer.buf)
437 PyBuffer_Release(&state->buffer);
438 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000439}
440
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000441LOCAL(void)
442state_fini(SRE_STATE* state)
443{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600444 if (state->buffer.buf)
445 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000446 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000447 data_stack_dealloc(state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300448 PyMem_Del(state->mark);
449 state->mark = NULL;
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000450}
451
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000452/* calculate offset from start of string */
453#define STATE_OFFSET(state, member)\
454 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
455
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000456LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300457getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300458 PyObject* string, Py_ssize_t start, Py_ssize_t end)
459{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300460 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300461 if (PyBytes_CheckExact(string) &&
462 start == 0 && end == PyBytes_GET_SIZE(string)) {
463 Py_INCREF(string);
464 return string;
465 }
466 return PyBytes_FromStringAndSize(
467 (const char *)ptr + start, end - start);
468 }
469 else {
470 return PyUnicode_Substring(string, start, end);
471 }
472}
473
474LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000475state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000476{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000477 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000478
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000479 index = (index - 1) * 2;
480
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000481 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000482 if (empty)
483 /* want empty string */
484 i = j = 0;
485 else {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200486 Py_RETURN_NONE;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000487 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000488 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000489 i = STATE_OFFSET(state, state->mark[index]);
490 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000491 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000492
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300493 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000494}
495
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000496static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100497pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000498{
499 switch (status) {
500 case SRE_ERROR_RECURSION_LIMIT:
Yury Selivanovf488fb42015-07-03 01:04:23 -0400501 /* This error code seems to be unused. */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000502 PyErr_SetString(
Yury Selivanovf488fb42015-07-03 01:04:23 -0400503 PyExc_RecursionError,
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000504 "maximum recursion limit exceeded"
505 );
506 break;
507 case SRE_ERROR_MEMORY:
508 PyErr_NoMemory();
509 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000510 case SRE_ERROR_INTERRUPTED:
511 /* An exception has already been raised, so let it fly */
512 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000513 default:
514 /* other error codes indicate compiler/engine bugs */
515 PyErr_SetString(
516 PyExc_RuntimeError,
517 "internal error in regular expression engine"
518 );
519 }
520}
521
Guido van Rossumb700df92000-03-31 14:59:30 +0000522static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000523pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000524{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000525 if (self->weakreflist != NULL)
526 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000527 Py_XDECREF(self->pattern);
528 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000529 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000530 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000531}
532
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300533LOCAL(Py_ssize_t)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300534sre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300535{
536 if (state->charsize == 1)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300537 return sre_ucs1_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300538 if (state->charsize == 2)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300539 return sre_ucs2_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300540 assert(state->charsize == 4);
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300541 return sre_ucs4_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300542}
543
544LOCAL(Py_ssize_t)
545sre_search(SRE_STATE* state, SRE_CODE* pattern)
546{
547 if (state->charsize == 1)
548 return sre_ucs1_search(state, pattern);
549 if (state->charsize == 2)
550 return sre_ucs2_search(state, pattern);
551 assert(state->charsize == 4);
552 return sre_ucs4_search(state, pattern);
553}
554
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300555/*[clinic input]
556_sre.SRE_Pattern.match
557
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200558 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300559 pos: Py_ssize_t = 0
560 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300561
562Matches zero or more characters at the beginning of the string.
563[clinic start generated code]*/
564
Larry Hastings16c51912014-01-07 11:53:01 -0800565static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300566_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200567 Py_ssize_t pos, Py_ssize_t endpos)
568/*[clinic end generated code: output=ea2d838888510661 input=a2ba191647abebe5]*/
Larry Hastings16c51912014-01-07 11:53:01 -0800569{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000570 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100571 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300572 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000573
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300574 if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000575 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000576
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000577 state.ptr = state.start;
578
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000579 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
580
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300581 status = sre_match(&state, PatternObject_GetCode(self), 0);
Guido van Rossumb700df92000-03-31 14:59:30 +0000582
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000583 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300584 if (PyErr_Occurred()) {
585 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000586 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300587 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000588
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300589 match = pattern_new_match(self, &state, status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000590 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300591 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000592}
593
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300594/*[clinic input]
595_sre.SRE_Pattern.fullmatch
596
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200597 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300598 pos: Py_ssize_t = 0
599 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300600
601Matches against all of the string
602[clinic start generated code]*/
603
604static PyObject *
605_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200606 Py_ssize_t pos, Py_ssize_t endpos)
607/*[clinic end generated code: output=5833c47782a35f4a input=a6f640614aaefceb]*/
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200608{
609 SRE_STATE state;
610 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300611 PyObject *match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200612
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300613 if (!state_init(&state, self, string, pos, endpos))
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200614 return NULL;
615
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200616 state.ptr = state.start;
617
618 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
619
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300620 status = sre_match(&state, PatternObject_GetCode(self), 1);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200621
622 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300623 if (PyErr_Occurred()) {
624 state_fini(&state);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200625 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300626 }
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200627
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300628 match = pattern_new_match(self, &state, status);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200629 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300630 return match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200631}
632
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300633/*[clinic input]
634_sre.SRE_Pattern.search
635
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200636 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300637 pos: Py_ssize_t = 0
638 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300639
640Scan through string looking for a match, and return a corresponding match object instance.
641
642Return None if no position in the string matches.
643[clinic start generated code]*/
644
645static PyObject *
646_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200647 Py_ssize_t pos, Py_ssize_t endpos)
648/*[clinic end generated code: output=25f302a644e951e8 input=4ae5cb7dc38fed1b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000649{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000650 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100651 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300652 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000653
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300654 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000655 return NULL;
656
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000657 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
658
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300659 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000660
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000661 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
662
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300663 if (PyErr_Occurred()) {
664 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000665 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300666 }
Thomas Wouters89f507f2006-12-13 04:49:30 +0000667
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300668 match = pattern_new_match(self, &state, status);
669 state_fini(&state);
670 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000671}
672
673static PyObject*
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200674call(const char* module, const char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000675{
676 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000677 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000678 PyObject* func;
679 PyObject* result;
680
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000681 if (!args)
682 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000683 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000684 if (!name)
685 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000686 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000687 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000688 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000689 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000690 func = PyObject_GetAttrString(mod, function);
691 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000692 if (!func)
693 return NULL;
694 result = PyObject_CallObject(func, args);
695 Py_DECREF(func);
696 Py_DECREF(args);
697 return result;
698}
699
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300700/*[clinic input]
701_sre.SRE_Pattern.findall
702
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200703 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300704 pos: Py_ssize_t = 0
705 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300706
707Return a list of all non-overlapping matches of pattern in string.
708[clinic start generated code]*/
709
710static PyObject *
711_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200712 Py_ssize_t pos, Py_ssize_t endpos)
713/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000714{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000715 SRE_STATE state;
716 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100717 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000718 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000719
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300720 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000721 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000722
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000723 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000724 if (!list) {
725 state_fini(&state);
726 return NULL;
727 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000728
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000729 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000730
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000731 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000732
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000733 state_reset(&state);
734
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000735 state.ptr = state.start;
736
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300737 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300738 if (PyErr_Occurred())
739 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000740
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000741 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000742 if (status == 0)
743 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000744 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000745 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000746 }
Tim Peters3d563502006-01-21 02:47:53 +0000747
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000748 /* don't bother to build a match object */
749 switch (self->groups) {
750 case 0:
751 b = STATE_OFFSET(&state, state.start);
752 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300753 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300754 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000755 if (!item)
756 goto error;
757 break;
758 case 1:
759 item = state_getslice(&state, 1, string, 1);
760 if (!item)
761 goto error;
762 break;
763 default:
764 item = PyTuple_New(self->groups);
765 if (!item)
766 goto error;
767 for (i = 0; i < self->groups; i++) {
768 PyObject* o = state_getslice(&state, i+1, string, 1);
769 if (!o) {
770 Py_DECREF(item);
771 goto error;
772 }
773 PyTuple_SET_ITEM(item, i, o);
774 }
775 break;
776 }
777
778 status = PyList_Append(list, item);
779 Py_DECREF(item);
780 if (status < 0)
781 goto error;
782
783 if (state.ptr == state.start)
784 state.start = (void*) ((char*) state.ptr + state.charsize);
785 else
786 state.start = state.ptr;
787
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000788 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000789
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000790 state_fini(&state);
791 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000792
793error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000794 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000795 state_fini(&state);
796 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000797
Guido van Rossumb700df92000-03-31 14:59:30 +0000798}
799
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300800/*[clinic input]
801_sre.SRE_Pattern.finditer
802
803 string: object
804 pos: Py_ssize_t = 0
805 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
806
807Return an iterator over all non-overlapping matches for the RE pattern in string.
808
809For each match, the iterator returns a match object.
810[clinic start generated code]*/
811
812static PyObject *
813_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
814 Py_ssize_t pos, Py_ssize_t endpos)
815/*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
Fredrik Lundh703ce812001-10-24 22:16:30 +0000816{
817 PyObject* scanner;
818 PyObject* search;
819 PyObject* iterator;
820
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300821 scanner = pattern_scanner(self, string, pos, endpos);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000822 if (!scanner)
823 return NULL;
824
825 search = PyObject_GetAttrString(scanner, "search");
826 Py_DECREF(scanner);
827 if (!search)
828 return NULL;
829
830 iterator = PyCallIter_New(search, Py_None);
831 Py_DECREF(search);
832
833 return iterator;
834}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000835
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300836/*[clinic input]
837_sre.SRE_Pattern.scanner
838
839 string: object
840 pos: Py_ssize_t = 0
841 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
842
843[clinic start generated code]*/
844
845static PyObject *
846_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
847 Py_ssize_t pos, Py_ssize_t endpos)
848/*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
849{
850 return pattern_scanner(self, string, pos, endpos);
851}
852
853/*[clinic input]
854_sre.SRE_Pattern.split
855
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200856 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300857 maxsplit: Py_ssize_t = 0
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300858
859Split string by the occurrences of pattern.
860[clinic start generated code]*/
861
862static PyObject *
863_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200864 Py_ssize_t maxsplit)
865/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000866{
867 SRE_STATE state;
868 PyObject* list;
869 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100870 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000871 Py_ssize_t n;
872 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000873 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000874
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200875 assert(self->codesize != 0);
876 if (self->code[0] != SRE_OP_INFO || self->code[3] == 0) {
877 if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) {
878 PyErr_SetString(PyExc_ValueError,
879 "split() requires a non-empty pattern match.");
880 return NULL;
881 }
882 if (PyErr_WarnEx(PyExc_FutureWarning,
883 "split() requires a non-empty pattern match.",
884 1) < 0)
885 return NULL;
886 }
887
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300888 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000889 return NULL;
890
891 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000892 if (!list) {
893 state_fini(&state);
894 return NULL;
895 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000896
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000897 n = 0;
898 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000899
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000900 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000901
902 state_reset(&state);
903
904 state.ptr = state.start;
905
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300906 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300907 if (PyErr_Occurred())
908 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000909
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000910 if (status <= 0) {
911 if (status == 0)
912 break;
913 pattern_error(status);
914 goto error;
915 }
Tim Peters3d563502006-01-21 02:47:53 +0000916
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000917 if (state.start == state.ptr) {
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +0300918 if (last == state.end || state.ptr == state.end)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000919 break;
920 /* skip one character */
921 state.start = (void*) ((char*) state.ptr + state.charsize);
922 continue;
923 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000924
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000925 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300926 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000927 string, STATE_OFFSET(&state, last),
928 STATE_OFFSET(&state, state.start)
929 );
930 if (!item)
931 goto error;
932 status = PyList_Append(list, item);
933 Py_DECREF(item);
934 if (status < 0)
935 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000936
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000937 /* add groups (if any) */
938 for (i = 0; i < self->groups; i++) {
939 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000940 if (!item)
941 goto error;
942 status = PyList_Append(list, item);
943 Py_DECREF(item);
944 if (status < 0)
945 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000946 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000947
948 n = n + 1;
949
950 last = state.start = state.ptr;
951
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000952 }
953
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000954 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300955 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000956 string, STATE_OFFSET(&state, last), state.endpos
957 );
958 if (!item)
959 goto error;
960 status = PyList_Append(list, item);
961 Py_DECREF(item);
962 if (status < 0)
963 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000964
965 state_fini(&state);
966 return list;
967
968error:
969 Py_DECREF(list);
970 state_fini(&state);
971 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000972
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000973}
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000974
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000975static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000976pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000977 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000978{
979 SRE_STATE state;
980 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +0300981 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000982 PyObject* item;
983 PyObject* filter;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000984 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000985 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100986 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000987 Py_ssize_t n;
988 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300989 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000990 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600991 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000992
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000993 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +0000994 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000995 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +0000996 Py_INCREF(filter);
997 filter_is_callable = 1;
998 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000999 /* if not callable, check if it's a literal string */
1000 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001001 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001002 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001003 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001004 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001005 if (charsize == 1)
1006 literal = memchr(ptr, '\\', n) == NULL;
1007 else
1008 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001009 } else {
1010 PyErr_Clear();
1011 literal = 0;
1012 }
Benjamin Petersone48944b2012-03-07 14:50:25 -06001013 if (view.buf)
1014 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001015 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001016 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001017 Py_INCREF(filter);
1018 filter_is_callable = 0;
1019 } else {
1020 /* not a literal; hand it over to the template compiler */
1021 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001022 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001023 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001024 );
1025 if (!filter)
1026 return NULL;
1027 filter_is_callable = PyCallable_Check(filter);
1028 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00001029 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001030
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001031 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001032 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001033 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001034 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001035
1036 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001037 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001038 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001039 state_fini(&state);
1040 return NULL;
1041 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001042
1043 n = i = 0;
1044
1045 while (!count || n < count) {
1046
1047 state_reset(&state);
1048
1049 state.ptr = state.start;
1050
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001051 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001052 if (PyErr_Occurred())
1053 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001054
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001055 if (status <= 0) {
1056 if (status == 0)
1057 break;
1058 pattern_error(status);
1059 goto error;
1060 }
Tim Peters3d563502006-01-21 02:47:53 +00001061
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001062 b = STATE_OFFSET(&state, state.start);
1063 e = STATE_OFFSET(&state, state.ptr);
1064
1065 if (i < b) {
1066 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001067 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001068 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001069 if (!item)
1070 goto error;
1071 status = PyList_Append(list, item);
1072 Py_DECREF(item);
1073 if (status < 0)
1074 goto error;
1075
1076 } else if (i == b && i == e && n > 0)
1077 /* ignore empty match on latest position */
1078 goto next;
1079
1080 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001081 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001082 match = pattern_new_match(self, &state, 1);
1083 if (!match)
1084 goto error;
Victor Stinner7bfb42d2016-12-05 17:04:32 +01001085 item = PyObject_CallFunctionObjArgs(filter, match, NULL);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001086 Py_DECREF(match);
1087 if (!item)
1088 goto error;
1089 } else {
1090 /* filter is literal string */
1091 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001092 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001093 }
1094
1095 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001096 if (item != Py_None) {
1097 status = PyList_Append(list, item);
1098 Py_DECREF(item);
1099 if (status < 0)
1100 goto error;
1101 }
Tim Peters3d563502006-01-21 02:47:53 +00001102
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001103 i = e;
1104 n = n + 1;
1105
1106next:
1107 /* move on */
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001108 if (state.ptr == state.end)
1109 break;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001110 if (state.ptr == state.start)
1111 state.start = (void*) ((char*) state.ptr + state.charsize);
1112 else
1113 state.start = state.ptr;
1114
1115 }
1116
1117 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001118 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001119 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001120 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001121 if (!item)
1122 goto error;
1123 status = PyList_Append(list, item);
1124 Py_DECREF(item);
1125 if (status < 0)
1126 goto error;
1127 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001128
1129 state_fini(&state);
1130
Guido van Rossum4e173842001-12-07 04:25:10 +00001131 Py_DECREF(filter);
1132
Fredrik Lundhdac58492001-10-21 21:48:30 +00001133 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001134 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001135 if (!joiner) {
1136 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001137 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001138 }
1139 if (PyList_GET_SIZE(list) == 0) {
1140 Py_DECREF(list);
1141 item = joiner;
1142 }
1143 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001144 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001145 item = _PyBytes_Join(joiner, list);
1146 else
1147 item = PyUnicode_Join(joiner, list);
1148 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001149 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001150 if (!item)
1151 return NULL;
1152 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001153
1154 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001155 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001156
1157 return item;
1158
1159error:
1160 Py_DECREF(list);
1161 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001162 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001163 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001164
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001165}
1166
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001167/*[clinic input]
1168_sre.SRE_Pattern.sub
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001169
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001170 repl: object
1171 string: object
1172 count: Py_ssize_t = 0
1173
1174Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1175[clinic start generated code]*/
1176
1177static PyObject *
1178_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1179 PyObject *string, Py_ssize_t count)
1180/*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1181{
1182 return pattern_subx(self, repl, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001183}
1184
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001185/*[clinic input]
1186_sre.SRE_Pattern.subn
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001187
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001188 repl: object
1189 string: object
1190 count: Py_ssize_t = 0
1191
1192Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1193[clinic start generated code]*/
1194
1195static PyObject *
1196_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1197 PyObject *string, Py_ssize_t count)
1198/*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1199{
1200 return pattern_subx(self, repl, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001201}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001202
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001203/*[clinic input]
1204_sre.SRE_Pattern.__copy__
1205
1206[clinic start generated code]*/
1207
1208static PyObject *
1209_sre_SRE_Pattern___copy___impl(PatternObject *self)
1210/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001211{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001212 Py_INCREF(self);
1213 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001214}
1215
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001216/*[clinic input]
1217_sre.SRE_Pattern.__deepcopy__
1218
1219 memo: object
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001220 /
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001221
1222[clinic start generated code]*/
1223
1224static PyObject *
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001225_sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
1226/*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001227{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001228 Py_INCREF(self);
1229 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001230}
1231
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001232static PyObject *
1233pattern_repr(PatternObject *obj)
1234{
1235 static const struct {
1236 const char *name;
1237 int value;
1238 } flag_names[] = {
1239 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1240 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1241 {"re.LOCALE", SRE_FLAG_LOCALE},
1242 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1243 {"re.DOTALL", SRE_FLAG_DOTALL},
1244 {"re.UNICODE", SRE_FLAG_UNICODE},
1245 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1246 {"re.DEBUG", SRE_FLAG_DEBUG},
1247 {"re.ASCII", SRE_FLAG_ASCII},
1248 };
1249 PyObject *result = NULL;
1250 PyObject *flag_items;
Victor Stinner706768c2014-08-16 01:03:39 +02001251 size_t i;
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001252 int flags = obj->flags;
1253
1254 /* Omit re.UNICODE for valid string patterns. */
1255 if (obj->isbytes == 0 &&
1256 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1257 SRE_FLAG_UNICODE)
1258 flags &= ~SRE_FLAG_UNICODE;
1259
1260 flag_items = PyList_New(0);
1261 if (!flag_items)
1262 return NULL;
1263
1264 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1265 if (flags & flag_names[i].value) {
1266 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1267 if (!item)
1268 goto done;
1269
1270 if (PyList_Append(flag_items, item) < 0) {
1271 Py_DECREF(item);
1272 goto done;
1273 }
1274 Py_DECREF(item);
1275 flags &= ~flag_names[i].value;
1276 }
1277 }
1278 if (flags) {
1279 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1280 if (!item)
1281 goto done;
1282
1283 if (PyList_Append(flag_items, item) < 0) {
1284 Py_DECREF(item);
1285 goto done;
1286 }
1287 Py_DECREF(item);
1288 }
1289
1290 if (PyList_Size(flag_items) > 0) {
1291 PyObject *flags_result;
1292 PyObject *sep = PyUnicode_FromString("|");
1293 if (!sep)
1294 goto done;
1295 flags_result = PyUnicode_Join(sep, flag_items);
1296 Py_DECREF(sep);
1297 if (!flags_result)
1298 goto done;
1299 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1300 obj->pattern, flags_result);
1301 Py_DECREF(flags_result);
1302 }
1303 else {
1304 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1305 }
1306
1307done:
1308 Py_DECREF(flag_items);
1309 return result;
1310}
1311
Raymond Hettinger94478742004-09-24 04:31:19 +00001312PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
1313
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001314/* PatternObject's 'groupindex' method. */
1315static PyObject *
1316pattern_groupindex(PatternObject *self)
1317{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001318 if (self->groupindex == NULL)
1319 return PyDict_New();
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001320 return PyDictProxy_New(self->groupindex);
1321}
1322
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001323static int _validate(PatternObject *self); /* Forward */
1324
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001325/*[clinic input]
1326_sre.compile
1327
1328 pattern: object
1329 flags: int
1330 code: object(subclass_of='&PyList_Type')
1331 groups: Py_ssize_t
Victor Stinner726a57d2016-11-22 23:04:39 +01001332 groupindex: object(subclass_of='&PyDict_Type')
1333 indexgroup: object(subclass_of='&PyTuple_Type')
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001334
1335[clinic start generated code]*/
1336
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001337static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001338_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001339 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1340 PyObject *indexgroup)
Victor Stinner726a57d2016-11-22 23:04:39 +01001341/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001342{
1343 /* "compile" pattern descriptor to pattern object */
1344
1345 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001346 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001347
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001348 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001349 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001350 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1351 if (!self)
1352 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001353 self->weakreflist = NULL;
1354 self->pattern = NULL;
1355 self->groupindex = NULL;
1356 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001357
1358 self->codesize = n;
1359
1360 for (i = 0; i < n; i++) {
1361 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001362 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001363 self->code[i] = (SRE_CODE) value;
1364 if ((unsigned long) self->code[i] != value) {
1365 PyErr_SetString(PyExc_OverflowError,
1366 "regular expression code size limit exceeded");
1367 break;
1368 }
1369 }
1370
1371 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001372 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001373 return NULL;
1374 }
1375
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001376 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001377 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001378 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001379 else {
1380 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001381 int charsize;
1382 Py_buffer view;
1383 view.buf = NULL;
1384 if (!getstring(pattern, &p_length, &self->isbytes,
1385 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001386 Py_DECREF(self);
1387 return NULL;
1388 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001389 if (view.buf)
1390 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001391 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001392
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001393 Py_INCREF(pattern);
1394 self->pattern = pattern;
1395
1396 self->flags = flags;
1397
1398 self->groups = groups;
1399
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001400 if (PyDict_GET_SIZE(groupindex) > 0) {
1401 Py_INCREF(groupindex);
1402 self->groupindex = groupindex;
1403 if (PyTuple_GET_SIZE(indexgroup) > 0) {
1404 Py_INCREF(indexgroup);
1405 self->indexgroup = indexgroup;
1406 }
1407 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001408
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001409 if (!_validate(self)) {
1410 Py_DECREF(self);
1411 return NULL;
1412 }
1413
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001414 return (PyObject*) self;
1415}
1416
Guido van Rossumb700df92000-03-31 14:59:30 +00001417/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001418/* Code validation */
1419
1420/* To learn more about this code, have a look at the _compile() function in
1421 Lib/sre_compile.py. The validation functions below checks the code array
1422 for conformance with the code patterns generated there.
1423
1424 The nice thing about the generated code is that it is position-independent:
1425 all jumps are relative jumps forward. Also, jumps don't cross each other:
1426 the target of a later jump is always earlier than the target of an earlier
1427 jump. IOW, this is okay:
1428
1429 J---------J-------T--------T
1430 \ \_____/ /
1431 \______________________/
1432
1433 but this is not:
1434
1435 J---------J-------T--------T
1436 \_________\_____/ /
1437 \____________/
1438
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001439 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001440*/
1441
1442/* Defining this one enables tracing of the validator */
1443#undef VVERBOSE
1444
1445/* Trace macro for the validator */
1446#if defined(VVERBOSE)
1447#define VTRACE(v) printf v
1448#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001449#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001450#endif
1451
1452/* Report failure */
1453#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1454
1455/* Extract opcode, argument, or skip count from code array */
1456#define GET_OP \
1457 do { \
1458 VTRACE(("%p: ", code)); \
1459 if (code >= end) FAIL; \
1460 op = *code++; \
1461 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1462 } while (0)
1463#define GET_ARG \
1464 do { \
1465 VTRACE(("%p= ", code)); \
1466 if (code >= end) FAIL; \
1467 arg = *code++; \
1468 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1469 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001470#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001471 do { \
1472 VTRACE(("%p= ", code)); \
1473 if (code >= end) FAIL; \
1474 skip = *code; \
1475 VTRACE(("%lu (skip to %p)\n", \
1476 (unsigned long)skip, code+skip)); \
Benjamin Petersonca470632016-09-06 13:47:26 -07001477 if (skip-adj > (uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001478 FAIL; \
1479 code++; \
1480 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001481#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001482
1483static int
1484_validate_charset(SRE_CODE *code, SRE_CODE *end)
1485{
1486 /* Some variables are manipulated by the macros above */
1487 SRE_CODE op;
1488 SRE_CODE arg;
1489 SRE_CODE offset;
1490 int i;
1491
1492 while (code < end) {
1493 GET_OP;
1494 switch (op) {
1495
1496 case SRE_OP_NEGATE:
1497 break;
1498
1499 case SRE_OP_LITERAL:
1500 GET_ARG;
1501 break;
1502
1503 case SRE_OP_RANGE:
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +02001504 case SRE_OP_RANGE_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001505 GET_ARG;
1506 GET_ARG;
1507 break;
1508
1509 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001510 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Benjamin Petersonca470632016-09-06 13:47:26 -07001511 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001512 FAIL;
1513 code += offset;
1514 break;
1515
1516 case SRE_OP_BIGCHARSET:
1517 GET_ARG; /* Number of blocks */
1518 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001519 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001520 FAIL;
1521 /* Make sure that each byte points to a valid block */
1522 for (i = 0; i < 256; i++) {
1523 if (((unsigned char *)code)[i] >= arg)
1524 FAIL;
1525 }
1526 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001527 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Benjamin Petersonca470632016-09-06 13:47:26 -07001528 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001529 FAIL;
1530 code += offset;
1531 break;
1532
1533 case SRE_OP_CATEGORY:
1534 GET_ARG;
1535 switch (arg) {
1536 case SRE_CATEGORY_DIGIT:
1537 case SRE_CATEGORY_NOT_DIGIT:
1538 case SRE_CATEGORY_SPACE:
1539 case SRE_CATEGORY_NOT_SPACE:
1540 case SRE_CATEGORY_WORD:
1541 case SRE_CATEGORY_NOT_WORD:
1542 case SRE_CATEGORY_LINEBREAK:
1543 case SRE_CATEGORY_NOT_LINEBREAK:
1544 case SRE_CATEGORY_LOC_WORD:
1545 case SRE_CATEGORY_LOC_NOT_WORD:
1546 case SRE_CATEGORY_UNI_DIGIT:
1547 case SRE_CATEGORY_UNI_NOT_DIGIT:
1548 case SRE_CATEGORY_UNI_SPACE:
1549 case SRE_CATEGORY_UNI_NOT_SPACE:
1550 case SRE_CATEGORY_UNI_WORD:
1551 case SRE_CATEGORY_UNI_NOT_WORD:
1552 case SRE_CATEGORY_UNI_LINEBREAK:
1553 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1554 break;
1555 default:
1556 FAIL;
1557 }
1558 break;
1559
1560 default:
1561 FAIL;
1562
1563 }
1564 }
1565
1566 return 1;
1567}
1568
1569static int
1570_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1571{
1572 /* Some variables are manipulated by the macros above */
1573 SRE_CODE op;
1574 SRE_CODE arg;
1575 SRE_CODE skip;
1576
1577 VTRACE(("code=%p, end=%p\n", code, end));
1578
1579 if (code > end)
1580 FAIL;
1581
1582 while (code < end) {
1583 GET_OP;
1584 switch (op) {
1585
1586 case SRE_OP_MARK:
1587 /* We don't check whether marks are properly nested; the
1588 sre_match() code is robust even if they don't, and the worst
1589 you can get is nonsensical match results. */
1590 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001591 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001592 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1593 FAIL;
1594 }
1595 break;
1596
1597 case SRE_OP_LITERAL:
1598 case SRE_OP_NOT_LITERAL:
1599 case SRE_OP_LITERAL_IGNORE:
1600 case SRE_OP_NOT_LITERAL_IGNORE:
Serhiy Storchaka898ff032017-05-05 08:53:40 +03001601 case SRE_OP_LITERAL_LOC_IGNORE:
1602 case SRE_OP_NOT_LITERAL_LOC_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001603 GET_ARG;
1604 /* The arg is just a character, nothing to check */
1605 break;
1606
1607 case SRE_OP_SUCCESS:
1608 case SRE_OP_FAILURE:
1609 /* Nothing to check; these normally end the matching process */
1610 break;
1611
1612 case SRE_OP_AT:
1613 GET_ARG;
1614 switch (arg) {
1615 case SRE_AT_BEGINNING:
1616 case SRE_AT_BEGINNING_STRING:
1617 case SRE_AT_BEGINNING_LINE:
1618 case SRE_AT_END:
1619 case SRE_AT_END_LINE:
1620 case SRE_AT_END_STRING:
1621 case SRE_AT_BOUNDARY:
1622 case SRE_AT_NON_BOUNDARY:
1623 case SRE_AT_LOC_BOUNDARY:
1624 case SRE_AT_LOC_NON_BOUNDARY:
1625 case SRE_AT_UNI_BOUNDARY:
1626 case SRE_AT_UNI_NON_BOUNDARY:
1627 break;
1628 default:
1629 FAIL;
1630 }
1631 break;
1632
1633 case SRE_OP_ANY:
1634 case SRE_OP_ANY_ALL:
1635 /* These have no operands */
1636 break;
1637
1638 case SRE_OP_IN:
1639 case SRE_OP_IN_IGNORE:
Serhiy Storchaka898ff032017-05-05 08:53:40 +03001640 case SRE_OP_IN_LOC_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001641 GET_SKIP;
1642 /* Stop 1 before the end; we check the FAILURE below */
1643 if (!_validate_charset(code, code+skip-2))
1644 FAIL;
1645 if (code[skip-2] != SRE_OP_FAILURE)
1646 FAIL;
1647 code += skip-1;
1648 break;
1649
1650 case SRE_OP_INFO:
1651 {
1652 /* A minimal info field is
1653 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1654 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1655 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001656 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001657 SRE_CODE *newcode;
1658 GET_SKIP;
1659 newcode = code+skip-1;
1660 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001661 GET_ARG;
1662 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001663 /* Check that only valid flags are present */
1664 if ((flags & ~(SRE_INFO_PREFIX |
1665 SRE_INFO_LITERAL |
1666 SRE_INFO_CHARSET)) != 0)
1667 FAIL;
1668 /* PREFIX and CHARSET are mutually exclusive */
1669 if ((flags & SRE_INFO_PREFIX) &&
1670 (flags & SRE_INFO_CHARSET))
1671 FAIL;
1672 /* LITERAL implies PREFIX */
1673 if ((flags & SRE_INFO_LITERAL) &&
1674 !(flags & SRE_INFO_PREFIX))
1675 FAIL;
1676 /* Validate the prefix */
1677 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001678 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001679 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001680 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001681 /* Here comes the prefix string */
Benjamin Petersonca470632016-09-06 13:47:26 -07001682 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001683 FAIL;
1684 code += prefix_len;
1685 /* And here comes the overlap table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001686 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001687 FAIL;
1688 /* Each overlap value should be < prefix_len */
1689 for (i = 0; i < prefix_len; i++) {
1690 if (code[i] >= prefix_len)
1691 FAIL;
1692 }
1693 code += prefix_len;
1694 }
1695 /* Validate the charset */
1696 if (flags & SRE_INFO_CHARSET) {
1697 if (!_validate_charset(code, newcode-1))
1698 FAIL;
1699 if (newcode[-1] != SRE_OP_FAILURE)
1700 FAIL;
1701 code = newcode;
1702 }
1703 else if (code != newcode) {
1704 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1705 FAIL;
1706 }
1707 }
1708 break;
1709
1710 case SRE_OP_BRANCH:
1711 {
1712 SRE_CODE *target = NULL;
1713 for (;;) {
1714 GET_SKIP;
1715 if (skip == 0)
1716 break;
1717 /* Stop 2 before the end; we check the JUMP below */
1718 if (!_validate_inner(code, code+skip-3, groups))
1719 FAIL;
1720 code += skip-3;
1721 /* Check that it ends with a JUMP, and that each JUMP
1722 has the same target */
1723 GET_OP;
1724 if (op != SRE_OP_JUMP)
1725 FAIL;
1726 GET_SKIP;
1727 if (target == NULL)
1728 target = code+skip-1;
1729 else if (code+skip-1 != target)
1730 FAIL;
1731 }
1732 }
1733 break;
1734
1735 case SRE_OP_REPEAT_ONE:
1736 case SRE_OP_MIN_REPEAT_ONE:
1737 {
1738 SRE_CODE min, max;
1739 GET_SKIP;
1740 GET_ARG; min = arg;
1741 GET_ARG; max = arg;
1742 if (min > max)
1743 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001744 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001745 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001746 if (!_validate_inner(code, code+skip-4, groups))
1747 FAIL;
1748 code += skip-4;
1749 GET_OP;
1750 if (op != SRE_OP_SUCCESS)
1751 FAIL;
1752 }
1753 break;
1754
1755 case SRE_OP_REPEAT:
1756 {
1757 SRE_CODE min, max;
1758 GET_SKIP;
1759 GET_ARG; min = arg;
1760 GET_ARG; max = arg;
1761 if (min > max)
1762 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001763 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001764 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001765 if (!_validate_inner(code, code+skip-3, groups))
1766 FAIL;
1767 code += skip-3;
1768 GET_OP;
1769 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1770 FAIL;
1771 }
1772 break;
1773
1774 case SRE_OP_GROUPREF:
1775 case SRE_OP_GROUPREF_IGNORE:
1776 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001777 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001778 FAIL;
1779 break;
1780
1781 case SRE_OP_GROUPREF_EXISTS:
1782 /* The regex syntax for this is: '(?(group)then|else)', where
1783 'group' is either an integer group number or a group name,
1784 'then' and 'else' are sub-regexes, and 'else' is optional. */
1785 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001786 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001787 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001788 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001789 code--; /* The skip is relative to the first arg! */
1790 /* There are two possibilities here: if there is both a 'then'
1791 part and an 'else' part, the generated code looks like:
1792
1793 GROUPREF_EXISTS
1794 <group>
1795 <skipyes>
1796 ...then part...
1797 JUMP
1798 <skipno>
1799 (<skipyes> jumps here)
1800 ...else part...
1801 (<skipno> jumps here)
1802
1803 If there is only a 'then' part, it looks like:
1804
1805 GROUPREF_EXISTS
1806 <group>
1807 <skip>
1808 ...then part...
1809 (<skip> jumps here)
1810
1811 There is no direct way to decide which it is, and we don't want
1812 to allow arbitrary jumps anywhere in the code; so we just look
1813 for a JUMP opcode preceding our skip target.
1814 */
Benjamin Petersonca470632016-09-06 13:47:26 -07001815 if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001816 code[skip-3] == SRE_OP_JUMP)
1817 {
1818 VTRACE(("both then and else parts present\n"));
1819 if (!_validate_inner(code+1, code+skip-3, groups))
1820 FAIL;
1821 code += skip-2; /* Position after JUMP, at <skipno> */
1822 GET_SKIP;
1823 if (!_validate_inner(code, code+skip-1, groups))
1824 FAIL;
1825 code += skip-1;
1826 }
1827 else {
1828 VTRACE(("only a then part present\n"));
1829 if (!_validate_inner(code+1, code+skip-1, groups))
1830 FAIL;
1831 code += skip-1;
1832 }
1833 break;
1834
1835 case SRE_OP_ASSERT:
1836 case SRE_OP_ASSERT_NOT:
1837 GET_SKIP;
1838 GET_ARG; /* 0 for lookahead, width for lookbehind */
1839 code--; /* Back up over arg to simplify math below */
1840 if (arg & 0x80000000)
1841 FAIL; /* Width too large */
1842 /* Stop 1 before the end; we check the SUCCESS below */
1843 if (!_validate_inner(code+1, code+skip-2, groups))
1844 FAIL;
1845 code += skip-2;
1846 GET_OP;
1847 if (op != SRE_OP_SUCCESS)
1848 FAIL;
1849 break;
1850
1851 default:
1852 FAIL;
1853
1854 }
1855 }
1856
1857 VTRACE(("okay\n"));
1858 return 1;
1859}
1860
1861static int
1862_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1863{
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001864 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1865 code >= end || end[-1] != SRE_OP_SUCCESS)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001866 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001867 return _validate_inner(code, end-1, groups);
1868}
1869
1870static int
1871_validate(PatternObject *self)
1872{
1873 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1874 {
1875 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1876 return 0;
1877 }
1878 else
1879 VTRACE(("Success!\n"));
1880 return 1;
1881}
1882
1883/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001884/* match methods */
1885
1886static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001887match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001888{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001889 Py_XDECREF(self->regs);
1890 Py_XDECREF(self->string);
1891 Py_DECREF(self->pattern);
1892 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001893}
1894
1895static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001896match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001897{
Serhiy Storchaka25324972013-10-16 12:46:28 +03001898 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001899 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001900 Py_buffer view;
1901 PyObject *result;
1902 void* ptr;
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02001903 Py_ssize_t i, j;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001904
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001905 if (index < 0 || index >= self->groups) {
1906 /* raise IndexError if we were given a bad group number */
1907 PyErr_SetString(
1908 PyExc_IndexError,
1909 "no such group"
1910 );
1911 return NULL;
1912 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001913
Fredrik Lundh6f013982000-07-03 18:44:21 +00001914 index *= 2;
1915
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001916 if (self->string == Py_None || self->mark[index] < 0) {
1917 /* return default value if the string or group is undefined */
1918 Py_INCREF(def);
1919 return def;
1920 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001921
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001922 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001923 if (ptr == NULL)
1924 return NULL;
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02001925
1926 i = self->mark[index];
1927 j = self->mark[index+1];
1928 i = Py_MIN(i, length);
1929 j = Py_MIN(j, length);
1930 result = getslice(isbytes, ptr, self->string, i, j);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001931 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001932 PyBuffer_Release(&view);
1933 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001934}
1935
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001936static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001937match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001938{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001939 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001940
Guido van Rossumddefaf32007-01-14 03:31:43 +00001941 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001942 /* Default value */
1943 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001944
Serhiy Storchaka977b3ac2016-06-18 16:48:07 +03001945 if (PyIndex_Check(index)) {
1946 return PyNumber_AsSsize_t(index, NULL);
1947 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001948
Fredrik Lundh6f013982000-07-03 18:44:21 +00001949 i = -1;
1950
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001951 if (self->pattern->groupindex) {
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001952 index = PyDict_GetItem(self->pattern->groupindex, index);
1953 if (index && PyLong_Check(index)) {
1954 i = PyLong_AsSsize_t(index);
1955 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001956 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001957
1958 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001959}
1960
1961static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001962match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001963{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001964 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001965}
1966
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001967/*[clinic input]
1968_sre.SRE_Match.expand
1969
1970 template: object
1971
1972Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
1973[clinic start generated code]*/
1974
1975static PyObject *
1976_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
1977/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001978{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001979 /* delegate to Python code */
1980 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001981 SRE_PY_MODULE, "_expand",
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001982 PyTuple_Pack(3, self->pattern, self, template)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001983 );
1984}
1985
1986static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001987match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001988{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001989 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001990 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001991
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001992 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001993
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001994 switch (size) {
1995 case 0:
Serhiy Storchakaba85d692017-03-30 09:09:41 +03001996 result = match_getslice(self, _PyLong_Zero, Py_None);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001997 break;
1998 case 1:
1999 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2000 break;
2001 default:
2002 /* fetch multiple items */
2003 result = PyTuple_New(size);
2004 if (!result)
2005 return NULL;
2006 for (i = 0; i < size; i++) {
2007 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002008 self, PyTuple_GET_ITEM(args, i), Py_None
2009 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002010 if (!item) {
2011 Py_DECREF(result);
2012 return NULL;
2013 }
2014 PyTuple_SET_ITEM(result, i, item);
2015 }
2016 break;
2017 }
2018 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002019}
2020
Eric V. Smith605bdae2016-09-11 08:55:43 -04002021static PyObject*
2022match_getitem(MatchObject* self, PyObject* name)
2023{
2024 return match_getslice(self, name, Py_None);
2025}
2026
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002027/*[clinic input]
2028_sre.SRE_Match.groups
2029
2030 default: object = None
2031 Is used for groups that did not participate in the match.
2032
2033Return a tuple containing all the subgroups of the match, from 1.
2034[clinic start generated code]*/
2035
2036static PyObject *
2037_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2038/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002039{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002040 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002041 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002042
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002043 result = PyTuple_New(self->groups-1);
2044 if (!result)
2045 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002046
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002047 for (index = 1; index < self->groups; index++) {
2048 PyObject* item;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002049 item = match_getslice_by_index(self, index, default_value);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002050 if (!item) {
2051 Py_DECREF(result);
2052 return NULL;
2053 }
2054 PyTuple_SET_ITEM(result, index-1, item);
2055 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002056
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002057 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002058}
2059
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002060/*[clinic input]
2061_sre.SRE_Match.groupdict
2062
2063 default: object = None
2064 Is used for groups that did not participate in the match.
2065
2066Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2067[clinic start generated code]*/
2068
2069static PyObject *
2070_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2071/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002072{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002073 PyObject *result;
2074 PyObject *key;
2075 PyObject *value;
2076 Py_ssize_t pos = 0;
2077 Py_hash_t hash;
Guido van Rossumb700df92000-03-31 14:59:30 +00002078
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002079 result = PyDict_New();
2080 if (!result || !self->pattern->groupindex)
2081 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002082
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002083 while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002084 int status;
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002085 Py_INCREF(key);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002086 value = match_getslice(self, key, default_value);
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002087 if (!value) {
2088 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002089 goto failed;
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002090 }
2091 status = _PyDict_SetItem_KnownHash(result, key, value, hash);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002092 Py_DECREF(value);
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002093 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002094 if (status < 0)
2095 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002096 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002097
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002098 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002099
2100failed:
Fredrik Lundh770617b2001-01-14 15:06:11 +00002101 Py_DECREF(result);
2102 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002103}
2104
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002105/*[clinic input]
2106_sre.SRE_Match.start -> Py_ssize_t
2107
2108 group: object(c_default="NULL") = 0
2109 /
2110
2111Return index of the start of the substring matched by group.
2112[clinic start generated code]*/
2113
2114static Py_ssize_t
2115_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2116/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002117{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002118 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002119
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002120 if (index < 0 || index >= self->groups) {
2121 PyErr_SetString(
2122 PyExc_IndexError,
2123 "no such group"
2124 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002125 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002126 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002127
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002128 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002129 return self->mark[index*2];
Guido van Rossumb700df92000-03-31 14:59:30 +00002130}
2131
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002132/*[clinic input]
2133_sre.SRE_Match.end -> Py_ssize_t
2134
2135 group: object(c_default="NULL") = 0
2136 /
2137
2138Return index of the end of the substring matched by group.
2139[clinic start generated code]*/
2140
2141static Py_ssize_t
2142_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2143/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002144{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002145 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002146
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002147 if (index < 0 || index >= self->groups) {
2148 PyErr_SetString(
2149 PyExc_IndexError,
2150 "no such group"
2151 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002152 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002153 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002154
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002155 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002156 return self->mark[index*2+1];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002157}
2158
2159LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002160_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002161{
2162 PyObject* pair;
2163 PyObject* item;
2164
2165 pair = PyTuple_New(2);
2166 if (!pair)
2167 return NULL;
2168
Christian Heimes217cfd12007-12-02 14:31:20 +00002169 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002170 if (!item)
2171 goto error;
2172 PyTuple_SET_ITEM(pair, 0, item);
2173
Christian Heimes217cfd12007-12-02 14:31:20 +00002174 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002175 if (!item)
2176 goto error;
2177 PyTuple_SET_ITEM(pair, 1, item);
2178
2179 return pair;
2180
2181 error:
2182 Py_DECREF(pair);
2183 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002184}
2185
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002186/*[clinic input]
2187_sre.SRE_Match.span
2188
2189 group: object(c_default="NULL") = 0
2190 /
2191
2192For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).
2193[clinic start generated code]*/
2194
2195static PyObject *
2196_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2197/*[clinic end generated code: output=f02ae40594d14fe6 input=49092b6008d176d3]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002198{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002199 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002200
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002201 if (index < 0 || index >= self->groups) {
2202 PyErr_SetString(
2203 PyExc_IndexError,
2204 "no such group"
2205 );
2206 return NULL;
2207 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002208
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002209 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002210 return _pair(self->mark[index*2], self->mark[index*2+1]);
2211}
2212
2213static PyObject*
2214match_regs(MatchObject* self)
2215{
2216 PyObject* regs;
2217 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002218 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002219
2220 regs = PyTuple_New(self->groups);
2221 if (!regs)
2222 return NULL;
2223
2224 for (index = 0; index < self->groups; index++) {
2225 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2226 if (!item) {
2227 Py_DECREF(regs);
2228 return NULL;
2229 }
2230 PyTuple_SET_ITEM(regs, index, item);
2231 }
2232
2233 Py_INCREF(regs);
2234 self->regs = regs;
2235
2236 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002237}
2238
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002239/*[clinic input]
2240_sre.SRE_Match.__copy__
2241
2242[clinic start generated code]*/
2243
2244static PyObject *
2245_sre_SRE_Match___copy___impl(MatchObject *self)
2246/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002247{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002248 Py_INCREF(self);
2249 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002250}
2251
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002252/*[clinic input]
2253_sre.SRE_Match.__deepcopy__
2254
2255 memo: object
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002256 /
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002257
2258[clinic start generated code]*/
2259
2260static PyObject *
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002261_sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
2262/*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002263{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002264 Py_INCREF(self);
2265 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002266}
2267
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002268PyDoc_STRVAR(match_doc,
2269"The result of re.match() and re.search().\n\
2270Match objects always have a boolean value of True.");
2271
2272PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002273"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002274 Return subgroup(s) of the match by indices or names.\n\
2275 For 0 returns the entire match.");
2276
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002277static PyObject *
2278match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002279{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002280 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002281 return PyLong_FromSsize_t(self->lastindex);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002282 Py_RETURN_NONE;
Guido van Rossumb700df92000-03-31 14:59:30 +00002283}
2284
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002285static PyObject *
2286match_lastgroup_get(MatchObject *self)
2287{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002288 if (self->pattern->indexgroup &&
2289 self->lastindex >= 0 &&
2290 self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2291 {
2292 PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2293 self->lastindex);
2294 Py_INCREF(result);
2295 return result;
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002296 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002297 Py_RETURN_NONE;
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002298}
2299
2300static PyObject *
2301match_regs_get(MatchObject *self)
2302{
2303 if (self->regs) {
2304 Py_INCREF(self->regs);
2305 return self->regs;
2306 } else
2307 return match_regs(self);
2308}
2309
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002310static PyObject *
2311match_repr(MatchObject *self)
2312{
2313 PyObject *result;
2314 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2315 if (group0 == NULL)
2316 return NULL;
2317 result = PyUnicode_FromFormat(
2318 "<%s object; span=(%d, %d), match=%.50R>",
2319 Py_TYPE(self)->tp_name,
2320 self->mark[0], self->mark[1], group0);
2321 Py_DECREF(group0);
2322 return result;
2323}
2324
2325
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002326static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002327pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002328{
2329 /* create match object (from state object) */
2330
2331 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002332 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002333 char* base;
2334 int n;
2335
2336 if (status > 0) {
2337
2338 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002339 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002340 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2341 2*(pattern->groups+1));
2342 if (!match)
2343 return NULL;
2344
2345 Py_INCREF(pattern);
2346 match->pattern = pattern;
2347
2348 Py_INCREF(state->string);
2349 match->string = state->string;
2350
2351 match->regs = NULL;
2352 match->groups = pattern->groups+1;
2353
2354 /* fill in group slices */
2355
2356 base = (char*) state->beginning;
2357 n = state->charsize;
2358
2359 match->mark[0] = ((char*) state->start - base) / n;
2360 match->mark[1] = ((char*) state->ptr - base) / n;
2361
2362 for (i = j = 0; i < pattern->groups; i++, j+=2)
2363 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2364 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2365 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2366 } else
2367 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2368
2369 match->pos = state->pos;
2370 match->endpos = state->endpos;
2371
2372 match->lastindex = state->lastindex;
2373
2374 return (PyObject*) match;
2375
2376 } else if (status == 0) {
2377
2378 /* no match */
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002379 Py_RETURN_NONE;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002380
2381 }
2382
2383 /* internal error */
2384 pattern_error(status);
2385 return NULL;
2386}
2387
2388
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002389/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002390/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002391
2392static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002393scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002394{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002395 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002396 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002397 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002398}
2399
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002400/*[clinic input]
2401_sre.SRE_Scanner.match
2402
2403[clinic start generated code]*/
2404
2405static PyObject *
2406_sre_SRE_Scanner_match_impl(ScannerObject *self)
2407/*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002408{
2409 SRE_STATE* state = &self->state;
2410 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002411 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002412
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002413 if (state->start == NULL)
2414 Py_RETURN_NONE;
2415
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002416 state_reset(state);
2417
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002418 state->ptr = state->start;
2419
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03002420 status = sre_match(state, PatternObject_GetCode(self->pattern), 0);
Thomas Wouters89f507f2006-12-13 04:49:30 +00002421 if (PyErr_Occurred())
2422 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002423
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002424 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002425 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002426
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002427 if (status == 0)
2428 state->start = NULL;
2429 else if (state->ptr != state->start)
2430 state->start = state->ptr;
2431 else if (state->ptr != state->end)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002432 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002433 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002434 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002435
2436 return match;
2437}
2438
2439
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002440/*[clinic input]
2441_sre.SRE_Scanner.search
2442
2443[clinic start generated code]*/
2444
2445static PyObject *
2446_sre_SRE_Scanner_search_impl(ScannerObject *self)
2447/*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002448{
2449 SRE_STATE* state = &self->state;
2450 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002451 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002452
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002453 if (state->start == NULL)
2454 Py_RETURN_NONE;
2455
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002456 state_reset(state);
2457
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002458 state->ptr = state->start;
2459
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002460 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002461 if (PyErr_Occurred())
2462 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002463
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002464 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002465 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002466
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002467 if (status == 0)
2468 state->start = NULL;
2469 else if (state->ptr != state->start)
2470 state->start = state->ptr;
2471 else if (state->ptr != state->end)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002472 state->start = (void*) ((char*) state->ptr + state->charsize);
2473 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002474 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002475
2476 return match;
2477}
2478
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002479static PyObject *
2480pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002481{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002482 ScannerObject* scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002483
2484 /* create scanner object */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002485 scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2486 if (!scanner)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002487 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002488 scanner->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002489
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002490 /* create search state object */
2491 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2492 Py_DECREF(scanner);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002493 return NULL;
2494 }
2495
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002496 Py_INCREF(self);
2497 scanner->pattern = (PyObject*) self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002498
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002499 return (PyObject*) scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002500}
2501
Victor Stinnerb44fb122016-11-21 16:35:08 +01002502static Py_hash_t
2503pattern_hash(PatternObject *self)
2504{
2505 Py_hash_t hash, hash2;
2506
2507 hash = PyObject_Hash(self->pattern);
2508 if (hash == -1) {
2509 return -1;
2510 }
2511
2512 hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2513 hash ^= hash2;
2514
2515 hash ^= self->flags;
2516 hash ^= self->isbytes;
2517 hash ^= self->codesize;
2518
2519 if (hash == -1) {
2520 hash = -2;
2521 }
2522 return hash;
2523}
2524
2525static PyObject*
2526pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2527{
2528 PatternObject *left, *right;
2529 int cmp;
2530
2531 if (op != Py_EQ && op != Py_NE) {
2532 Py_RETURN_NOTIMPLEMENTED;
2533 }
2534
2535 if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
2536 Py_RETURN_NOTIMPLEMENTED;
2537 }
Victor Stinnerbcf4dcc2016-11-22 15:30:38 +01002538
2539 if (lefto == righto) {
2540 /* a pattern is equal to itself */
2541 return PyBool_FromLong(op == Py_EQ);
2542 }
2543
Victor Stinnerb44fb122016-11-21 16:35:08 +01002544 left = (PatternObject *)lefto;
2545 right = (PatternObject *)righto;
2546
2547 cmp = (left->flags == right->flags
2548 && left->isbytes == right->isbytes
Victor Stinnere670b2d2016-11-22 15:23:00 +01002549 && left->codesize == right->codesize);
Victor Stinnerb44fb122016-11-21 16:35:08 +01002550 if (cmp) {
2551 /* Compare the code and the pattern because the same pattern can
2552 produce different codes depending on the locale used to compile the
2553 pattern when the re.LOCALE flag is used. Don't compare groups,
2554 indexgroup nor groupindex: they are derivated from the pattern. */
2555 cmp = (memcmp(left->code, right->code,
2556 sizeof(left->code[0]) * left->codesize) == 0);
2557 }
2558 if (cmp) {
2559 cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2560 Py_EQ);
2561 if (cmp < 0) {
2562 return NULL;
2563 }
2564 }
2565 if (op == Py_NE) {
2566 cmp = !cmp;
2567 }
2568 return PyBool_FromLong(cmp);
2569}
2570
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002571#include "clinic/_sre.c.h"
2572
2573static PyMethodDef pattern_methods[] = {
2574 _SRE_SRE_PATTERN_MATCH_METHODDEF
2575 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2576 _SRE_SRE_PATTERN_SEARCH_METHODDEF
2577 _SRE_SRE_PATTERN_SUB_METHODDEF
2578 _SRE_SRE_PATTERN_SUBN_METHODDEF
2579 _SRE_SRE_PATTERN_FINDALL_METHODDEF
2580 _SRE_SRE_PATTERN_SPLIT_METHODDEF
2581 _SRE_SRE_PATTERN_FINDITER_METHODDEF
2582 _SRE_SRE_PATTERN_SCANNER_METHODDEF
2583 _SRE_SRE_PATTERN___COPY___METHODDEF
2584 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2585 {NULL, NULL}
2586};
2587
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002588static PyGetSetDef pattern_getset[] = {
2589 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2590 "A dictionary mapping group names to group numbers."},
2591 {NULL} /* Sentinel */
2592};
2593
2594#define PAT_OFF(x) offsetof(PatternObject, x)
2595static PyMemberDef pattern_members[] = {
2596 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
2597 {"flags", T_INT, PAT_OFF(flags), READONLY},
2598 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
2599 {NULL} /* Sentinel */
2600};
2601
2602static PyTypeObject Pattern_Type = {
2603 PyVarObject_HEAD_INIT(NULL, 0)
2604 "_" SRE_MODULE ".SRE_Pattern",
2605 sizeof(PatternObject), sizeof(SRE_CODE),
2606 (destructor)pattern_dealloc, /* tp_dealloc */
2607 0, /* tp_print */
2608 0, /* tp_getattr */
2609 0, /* tp_setattr */
2610 0, /* tp_reserved */
2611 (reprfunc)pattern_repr, /* tp_repr */
2612 0, /* tp_as_number */
2613 0, /* tp_as_sequence */
2614 0, /* tp_as_mapping */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002615 (hashfunc)pattern_hash, /* tp_hash */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002616 0, /* tp_call */
2617 0, /* tp_str */
2618 0, /* tp_getattro */
2619 0, /* tp_setattro */
2620 0, /* tp_as_buffer */
2621 Py_TPFLAGS_DEFAULT, /* tp_flags */
2622 pattern_doc, /* tp_doc */
2623 0, /* tp_traverse */
2624 0, /* tp_clear */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002625 pattern_richcompare, /* tp_richcompare */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002626 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2627 0, /* tp_iter */
2628 0, /* tp_iternext */
2629 pattern_methods, /* tp_methods */
2630 pattern_members, /* tp_members */
2631 pattern_getset, /* tp_getset */
2632};
2633
Eric V. Smith605bdae2016-09-11 08:55:43 -04002634/* Match objects do not support length or assignment, but do support
2635 __getitem__. */
2636static PyMappingMethods match_as_mapping = {
2637 NULL,
2638 (binaryfunc)match_getitem,
2639 NULL
2640};
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002641
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002642static PyMethodDef match_methods[] = {
2643 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2644 _SRE_SRE_MATCH_START_METHODDEF
2645 _SRE_SRE_MATCH_END_METHODDEF
2646 _SRE_SRE_MATCH_SPAN_METHODDEF
2647 _SRE_SRE_MATCH_GROUPS_METHODDEF
2648 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2649 _SRE_SRE_MATCH_EXPAND_METHODDEF
2650 _SRE_SRE_MATCH___COPY___METHODDEF
2651 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2652 {NULL, NULL}
2653};
2654
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002655static PyGetSetDef match_getset[] = {
2656 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
2657 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
2658 {"regs", (getter)match_regs_get, (setter)NULL},
2659 {NULL}
2660};
2661
2662#define MATCH_OFF(x) offsetof(MatchObject, x)
2663static PyMemberDef match_members[] = {
2664 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
2665 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
2666 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
2667 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
2668 {NULL}
2669};
2670
2671/* FIXME: implement setattr("string", None) as a special case (to
2672 detach the associated string, if any */
2673
2674static PyTypeObject Match_Type = {
2675 PyVarObject_HEAD_INIT(NULL,0)
2676 "_" SRE_MODULE ".SRE_Match",
2677 sizeof(MatchObject), sizeof(Py_ssize_t),
2678 (destructor)match_dealloc, /* tp_dealloc */
2679 0, /* tp_print */
2680 0, /* tp_getattr */
2681 0, /* tp_setattr */
2682 0, /* tp_reserved */
2683 (reprfunc)match_repr, /* tp_repr */
2684 0, /* tp_as_number */
2685 0, /* tp_as_sequence */
Eric V. Smith605bdae2016-09-11 08:55:43 -04002686 &match_as_mapping, /* tp_as_mapping */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002687 0, /* tp_hash */
2688 0, /* tp_call */
2689 0, /* tp_str */
2690 0, /* tp_getattro */
2691 0, /* tp_setattro */
2692 0, /* tp_as_buffer */
2693 Py_TPFLAGS_DEFAULT, /* tp_flags */
2694 match_doc, /* tp_doc */
2695 0, /* tp_traverse */
2696 0, /* tp_clear */
2697 0, /* tp_richcompare */
2698 0, /* tp_weaklistoffset */
2699 0, /* tp_iter */
2700 0, /* tp_iternext */
2701 match_methods, /* tp_methods */
2702 match_members, /* tp_members */
2703 match_getset, /* tp_getset */
2704};
2705
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002706static PyMethodDef scanner_methods[] = {
2707 _SRE_SRE_SCANNER_MATCH_METHODDEF
2708 _SRE_SRE_SCANNER_SEARCH_METHODDEF
2709 {NULL, NULL}
2710};
2711
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002712#define SCAN_OFF(x) offsetof(ScannerObject, x)
2713static PyMemberDef scanner_members[] = {
2714 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2715 {NULL} /* Sentinel */
2716};
2717
2718static PyTypeObject Scanner_Type = {
2719 PyVarObject_HEAD_INIT(NULL, 0)
2720 "_" SRE_MODULE ".SRE_Scanner",
2721 sizeof(ScannerObject), 0,
2722 (destructor)scanner_dealloc,/* tp_dealloc */
2723 0, /* tp_print */
2724 0, /* tp_getattr */
2725 0, /* tp_setattr */
2726 0, /* tp_reserved */
2727 0, /* tp_repr */
2728 0, /* tp_as_number */
2729 0, /* tp_as_sequence */
2730 0, /* tp_as_mapping */
2731 0, /* tp_hash */
2732 0, /* tp_call */
2733 0, /* tp_str */
2734 0, /* tp_getattro */
2735 0, /* tp_setattro */
2736 0, /* tp_as_buffer */
2737 Py_TPFLAGS_DEFAULT, /* tp_flags */
2738 0, /* tp_doc */
2739 0, /* tp_traverse */
2740 0, /* tp_clear */
2741 0, /* tp_richcompare */
2742 0, /* tp_weaklistoffset */
2743 0, /* tp_iter */
2744 0, /* tp_iternext */
2745 scanner_methods, /* tp_methods */
2746 scanner_members, /* tp_members */
2747 0, /* tp_getset */
2748};
2749
Guido van Rossumb700df92000-03-31 14:59:30 +00002750static PyMethodDef _functions[] = {
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002751 _SRE_COMPILE_METHODDEF
2752 _SRE_GETCODESIZE_METHODDEF
Serhiy Storchaka7186cc22017-05-05 10:42:46 +03002753 _SRE_ASCII_TOLOWER_METHODDEF
2754 _SRE_UNICODE_TOLOWER_METHODDEF
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002755 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002756};
2757
Martin v. Löwis1a214512008-06-11 05:26:20 +00002758static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002759 PyModuleDef_HEAD_INIT,
2760 "_" SRE_MODULE,
2761 NULL,
2762 -1,
2763 _functions,
2764 NULL,
2765 NULL,
2766 NULL,
2767 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002768};
2769
2770PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002771{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002772 PyObject* m;
2773 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002774 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002775
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002776 /* Patch object types */
2777 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2778 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002779 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002780
Martin v. Löwis1a214512008-06-11 05:26:20 +00002781 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002782 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002783 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002784 d = PyModule_GetDict(m);
2785
Christian Heimes217cfd12007-12-02 14:31:20 +00002786 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002787 if (x) {
2788 PyDict_SetItemString(d, "MAGIC", x);
2789 Py_DECREF(x);
2790 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002791
Christian Heimes217cfd12007-12-02 14:31:20 +00002792 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002793 if (x) {
2794 PyDict_SetItemString(d, "CODESIZE", x);
2795 Py_DECREF(x);
2796 }
2797
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002798 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2799 if (x) {
2800 PyDict_SetItemString(d, "MAXREPEAT", x);
2801 Py_DECREF(x);
2802 }
2803
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03002804 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2805 if (x) {
2806 PyDict_SetItemString(d, "MAXGROUPS", x);
2807 Py_DECREF(x);
2808 }
2809
Neal Norwitzfe537132007-08-26 03:55:15 +00002810 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002811 if (x) {
2812 PyDict_SetItemString(d, "copyright", x);
2813 Py_DECREF(x);
2814 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002815 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002816}
2817
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002818/* vim:ts=4:sw=4:et
2819*/