blob: 6873f1db438d4113f2dc08536b1632d7d0d91864 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020038static const char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000063
Fredrik Lundh80946112000-06-29 18:03:25 +000064#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000065#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000066#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000067/* fastest possible local call under MSVC */
68#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000069#else
Benjamin Peterson791dc832017-04-20 23:52:19 -070070#define LOCAL(type) static inline type
Guido van Rossumb700df92000-03-31 14:59:30 +000071#endif
72
73/* error codes */
74#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000075#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000076#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000077#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000078#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000079
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000081#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000082#else
83#define TRACE(v)
84#endif
85
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000086/* -------------------------------------------------------------------- */
87/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000088
Fredrik Lundh436c3d582000-06-29 08:58:44 +000089#define SRE_IS_DIGIT(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030090 ((ch) < 128 && Py_ISDIGIT(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000091#define SRE_IS_SPACE(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030092 ((ch) < 128 && Py_ISSPACE(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000093#define SRE_IS_LINEBREAK(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030094 ((ch) == '\n')
Fredrik Lundh436c3d582000-06-29 08:58:44 +000095#define SRE_IS_ALNUM(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030096 ((ch) < 128 && Py_ISALNUM(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000097#define SRE_IS_WORD(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030098 ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
Guido van Rossumb700df92000-03-31 14:59:30 +000099
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000100static unsigned int sre_lower(unsigned int ch)
101{
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300102 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000103}
104
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200105static unsigned int sre_upper(unsigned int ch)
106{
107 return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
108}
109
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000110/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000111/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
112 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000113#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000114#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
115
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000116static unsigned int sre_lower_locale(unsigned int ch)
117{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000118 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000119}
120
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200121static unsigned int sre_upper_locale(unsigned int ch)
122{
123 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
124}
125
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000126/* unicode-specific character predicates */
127
Victor Stinner0058b862011-09-29 03:27:47 +0200128#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
129#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
130#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
131#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
132#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000133
134static unsigned int sre_lower_unicode(unsigned int ch)
135{
Victor Stinner0058b862011-09-29 03:27:47 +0200136 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000137}
138
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200139static unsigned int sre_upper_unicode(unsigned int ch)
140{
141 return (unsigned int) Py_UNICODE_TOUPPER(ch);
142}
143
Guido van Rossumb700df92000-03-31 14:59:30 +0000144LOCAL(int)
145sre_category(SRE_CODE category, unsigned int ch)
146{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000147 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000148
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000149 case SRE_CATEGORY_DIGIT:
150 return SRE_IS_DIGIT(ch);
151 case SRE_CATEGORY_NOT_DIGIT:
152 return !SRE_IS_DIGIT(ch);
153 case SRE_CATEGORY_SPACE:
154 return SRE_IS_SPACE(ch);
155 case SRE_CATEGORY_NOT_SPACE:
156 return !SRE_IS_SPACE(ch);
157 case SRE_CATEGORY_WORD:
158 return SRE_IS_WORD(ch);
159 case SRE_CATEGORY_NOT_WORD:
160 return !SRE_IS_WORD(ch);
161 case SRE_CATEGORY_LINEBREAK:
162 return SRE_IS_LINEBREAK(ch);
163 case SRE_CATEGORY_NOT_LINEBREAK:
164 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000165
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000166 case SRE_CATEGORY_LOC_WORD:
167 return SRE_LOC_IS_WORD(ch);
168 case SRE_CATEGORY_LOC_NOT_WORD:
169 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000170
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000171 case SRE_CATEGORY_UNI_DIGIT:
172 return SRE_UNI_IS_DIGIT(ch);
173 case SRE_CATEGORY_UNI_NOT_DIGIT:
174 return !SRE_UNI_IS_DIGIT(ch);
175 case SRE_CATEGORY_UNI_SPACE:
176 return SRE_UNI_IS_SPACE(ch);
177 case SRE_CATEGORY_UNI_NOT_SPACE:
178 return !SRE_UNI_IS_SPACE(ch);
179 case SRE_CATEGORY_UNI_WORD:
180 return SRE_UNI_IS_WORD(ch);
181 case SRE_CATEGORY_UNI_NOT_WORD:
182 return !SRE_UNI_IS_WORD(ch);
183 case SRE_CATEGORY_UNI_LINEBREAK:
184 return SRE_UNI_IS_LINEBREAK(ch);
185 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
186 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000187 }
188 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000189}
190
191/* helpers */
192
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000193static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000194data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000195{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000196 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000197 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000198 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000199 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000200 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000201}
202
203static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000204data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000205{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000206 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000207 minsize = state->data_stack_base+size;
208 cursize = state->data_stack_size;
209 if (cursize < minsize) {
210 void* stack;
211 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300212 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000213 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000214 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000215 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000216 return SRE_ERROR_MEMORY;
217 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000218 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000219 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000220 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000221 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000222}
223
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000224/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000225
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300226#define SRE_CHAR Py_UCS1
227#define SIZEOF_SRE_CHAR 1
228#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300229#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000230
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300231/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000232
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300233#define SRE_CHAR Py_UCS2
234#define SIZEOF_SRE_CHAR 2
235#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300236#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000237
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300238/* generate 32-bit unicode version */
239
240#define SRE_CHAR Py_UCS4
241#define SIZEOF_SRE_CHAR 4
242#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300243#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000244
245/* -------------------------------------------------------------------- */
246/* factories and destructors */
247
248/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100249static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300250static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
Guido van Rossumb700df92000-03-31 14:59:30 +0000251
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300252
253/*[clinic input]
254module _sre
255class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
256class _sre.SRE_Match "MatchObject *" "&Match_Type"
257class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
258[clinic start generated code]*/
259/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
260
Larry Hastings2d0a69a2015-05-03 14:49:19 -0700261static PyTypeObject Pattern_Type;
262static PyTypeObject Match_Type;
263static PyTypeObject Scanner_Type;
264
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300265/*[clinic input]
266_sre.getcodesize -> int
267[clinic start generated code]*/
268
269static int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300270_sre_getcodesize_impl(PyObject *module)
271/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000272{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300273 return sizeof(SRE_CODE);
Guido van Rossumb700df92000-03-31 14:59:30 +0000274}
275
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300276/*[clinic input]
Serhiy Storchaka6d336a02017-05-09 23:37:14 +0300277_sre.ascii_iscased -> bool
278
279 character: int
280 /
281
282[clinic start generated code]*/
283
284static int
285_sre_ascii_iscased_impl(PyObject *module, int character)
286/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
287{
288 unsigned int ch = (unsigned int)character;
289 return ch != sre_lower(ch) || ch != sre_upper(ch);
290}
291
292/*[clinic input]
293_sre.unicode_iscased -> bool
294
295 character: int
296 /
297
298[clinic start generated code]*/
299
300static int
301_sre_unicode_iscased_impl(PyObject *module, int character)
302/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
303{
304 unsigned int ch = (unsigned int)character;
305 return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
306}
307
308/*[clinic input]
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300309_sre.ascii_tolower -> int
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300310
311 character: int
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300312 /
313
314[clinic start generated code]*/
315
316static int
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300317_sre_ascii_tolower_impl(PyObject *module, int character)
318/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000319{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300320 return sre_lower(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000321}
322
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300323/*[clinic input]
324_sre.unicode_tolower -> int
325
326 character: int
327 /
328
329[clinic start generated code]*/
330
331static int
332_sre_unicode_tolower_impl(PyObject *module, int character)
333/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
334{
335 return sre_lower_unicode(character);
336}
337
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000338LOCAL(void)
339state_reset(SRE_STATE* state)
340{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000341 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000342 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000343
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000344 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000345 state->lastindex = -1;
346
347 state->repeat = NULL;
348
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000349 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000350}
351
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000352static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200353getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300354 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600355 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000356{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000357 /* given a python object, return a data pointer, a length (in
358 characters), and a character size. return NULL if the object
359 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000360
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000361 /* Unicode objects do not support the buffer API. So, get the data
362 directly instead. */
363 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200364 if (PyUnicode_READY(string) == -1)
365 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200366 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200367 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300368 *p_isbytes = 0;
369 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000370 }
371
Victor Stinner0058b862011-09-29 03:27:47 +0200372 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300373 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200374 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300375 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000376 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000377
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300378 *p_length = view->len;
379 *p_charsize = 1;
380 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000381
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300382 if (view->buf == NULL) {
383 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
384 PyBuffer_Release(view);
385 view->buf = NULL;
386 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000387 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300388 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000389}
390
391LOCAL(PyObject*)
392state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000393 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000394{
395 /* prepare state object */
396
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000397 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300398 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000399 void* ptr;
400
401 memset(state, 0, sizeof(SRE_STATE));
402
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300403 state->mark = PyMem_New(void *, pattern->groups * 2);
404 if (!state->mark) {
405 PyErr_NoMemory();
406 goto err;
407 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000408 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000409 state->lastindex = -1;
410
Benjamin Petersone48944b2012-03-07 14:50:25 -0600411 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300412 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000413 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600414 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000415
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300416 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600417 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200418 "cannot use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600419 goto err;
420 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300421 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600422 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200423 "cannot use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600424 goto err;
425 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000426
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000427 /* adjust boundaries */
428 if (start < 0)
429 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000430 else if (start > length)
431 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000432
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000433 if (end < 0)
434 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000435 else if (end > length)
436 end = length;
437
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300438 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000439 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000440
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000441 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000442
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000443 state->start = (void*) ((char*) ptr + start * state->charsize);
444 state->end = (void*) ((char*) ptr + end * state->charsize);
445
446 Py_INCREF(string);
447 state->string = string;
448 state->pos = start;
449 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000450
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200451 if (pattern->flags & SRE_FLAG_LOCALE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000452 state->lower = sre_lower_locale;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200453 state->upper = sre_upper_locale;
454 }
455 else if (pattern->flags & SRE_FLAG_UNICODE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000456 state->lower = sre_lower_unicode;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200457 state->upper = sre_upper_unicode;
458 }
459 else {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000460 state->lower = sre_lower;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200461 state->upper = sre_upper;
462 }
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000463
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000464 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600465 err:
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300466 PyMem_Del(state->mark);
467 state->mark = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600468 if (state->buffer.buf)
469 PyBuffer_Release(&state->buffer);
470 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000471}
472
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000473LOCAL(void)
474state_fini(SRE_STATE* state)
475{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600476 if (state->buffer.buf)
477 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000478 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000479 data_stack_dealloc(state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300480 PyMem_Del(state->mark);
481 state->mark = NULL;
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000482}
483
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000484/* calculate offset from start of string */
485#define STATE_OFFSET(state, member)\
486 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
487
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000488LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300489getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300490 PyObject* string, Py_ssize_t start, Py_ssize_t end)
491{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300492 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300493 if (PyBytes_CheckExact(string) &&
494 start == 0 && end == PyBytes_GET_SIZE(string)) {
495 Py_INCREF(string);
496 return string;
497 }
498 return PyBytes_FromStringAndSize(
499 (const char *)ptr + start, end - start);
500 }
501 else {
502 return PyUnicode_Substring(string, start, end);
503 }
504}
505
506LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000507state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000508{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000509 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000510
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000511 index = (index - 1) * 2;
512
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000513 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000514 if (empty)
515 /* want empty string */
516 i = j = 0;
517 else {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200518 Py_RETURN_NONE;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000519 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000520 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000521 i = STATE_OFFSET(state, state->mark[index]);
522 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000523 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000524
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300525 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000526}
527
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000528static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100529pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000530{
531 switch (status) {
532 case SRE_ERROR_RECURSION_LIMIT:
Yury Selivanovf488fb42015-07-03 01:04:23 -0400533 /* This error code seems to be unused. */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000534 PyErr_SetString(
Yury Selivanovf488fb42015-07-03 01:04:23 -0400535 PyExc_RecursionError,
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000536 "maximum recursion limit exceeded"
537 );
538 break;
539 case SRE_ERROR_MEMORY:
540 PyErr_NoMemory();
541 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000542 case SRE_ERROR_INTERRUPTED:
543 /* An exception has already been raised, so let it fly */
544 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000545 default:
546 /* other error codes indicate compiler/engine bugs */
547 PyErr_SetString(
548 PyExc_RuntimeError,
549 "internal error in regular expression engine"
550 );
551 }
552}
553
Guido van Rossumb700df92000-03-31 14:59:30 +0000554static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000555pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000556{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000557 if (self->weakreflist != NULL)
558 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000559 Py_XDECREF(self->pattern);
560 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000561 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000562 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000563}
564
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300565LOCAL(Py_ssize_t)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300566sre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300567{
568 if (state->charsize == 1)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300569 return sre_ucs1_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300570 if (state->charsize == 2)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300571 return sre_ucs2_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300572 assert(state->charsize == 4);
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300573 return sre_ucs4_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300574}
575
576LOCAL(Py_ssize_t)
577sre_search(SRE_STATE* state, SRE_CODE* pattern)
578{
579 if (state->charsize == 1)
580 return sre_ucs1_search(state, pattern);
581 if (state->charsize == 2)
582 return sre_ucs2_search(state, pattern);
583 assert(state->charsize == 4);
584 return sre_ucs4_search(state, pattern);
585}
586
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300587/*[clinic input]
588_sre.SRE_Pattern.match
589
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200590 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300591 pos: Py_ssize_t = 0
592 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300593
594Matches zero or more characters at the beginning of the string.
595[clinic start generated code]*/
596
Larry Hastings16c51912014-01-07 11:53:01 -0800597static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300598_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200599 Py_ssize_t pos, Py_ssize_t endpos)
600/*[clinic end generated code: output=ea2d838888510661 input=a2ba191647abebe5]*/
Larry Hastings16c51912014-01-07 11:53:01 -0800601{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000602 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100603 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300604 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000605
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300606 if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000607 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000608
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000609 state.ptr = state.start;
610
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000611 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
612
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300613 status = sre_match(&state, PatternObject_GetCode(self), 0);
Guido van Rossumb700df92000-03-31 14:59:30 +0000614
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000615 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300616 if (PyErr_Occurred()) {
617 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000618 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300619 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000620
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300621 match = pattern_new_match(self, &state, status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000622 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300623 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000624}
625
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300626/*[clinic input]
627_sre.SRE_Pattern.fullmatch
628
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200629 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300630 pos: Py_ssize_t = 0
631 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300632
633Matches against all of the string
634[clinic start generated code]*/
635
636static PyObject *
637_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200638 Py_ssize_t pos, Py_ssize_t endpos)
639/*[clinic end generated code: output=5833c47782a35f4a input=a6f640614aaefceb]*/
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200640{
641 SRE_STATE state;
642 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300643 PyObject *match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200644
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300645 if (!state_init(&state, self, string, pos, endpos))
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200646 return NULL;
647
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200648 state.ptr = state.start;
649
650 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
651
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300652 status = sre_match(&state, PatternObject_GetCode(self), 1);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200653
654 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300655 if (PyErr_Occurred()) {
656 state_fini(&state);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200657 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300658 }
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200659
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300660 match = pattern_new_match(self, &state, status);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200661 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300662 return match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200663}
664
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300665/*[clinic input]
666_sre.SRE_Pattern.search
667
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200668 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300669 pos: Py_ssize_t = 0
670 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300671
672Scan through string looking for a match, and return a corresponding match object instance.
673
674Return None if no position in the string matches.
675[clinic start generated code]*/
676
677static PyObject *
678_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200679 Py_ssize_t pos, Py_ssize_t endpos)
680/*[clinic end generated code: output=25f302a644e951e8 input=4ae5cb7dc38fed1b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000681{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000682 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100683 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300684 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000685
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300686 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000687 return NULL;
688
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000689 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
690
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300691 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000692
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000693 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
694
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300695 if (PyErr_Occurred()) {
696 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000697 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300698 }
Thomas Wouters89f507f2006-12-13 04:49:30 +0000699
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300700 match = pattern_new_match(self, &state, status);
701 state_fini(&state);
702 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000703}
704
705static PyObject*
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200706call(const char* module, const char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000707{
708 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000709 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000710 PyObject* func;
711 PyObject* result;
712
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000713 if (!args)
714 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000715 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000716 if (!name)
717 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000718 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000719 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000720 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000721 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000722 func = PyObject_GetAttrString(mod, function);
723 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000724 if (!func)
725 return NULL;
726 result = PyObject_CallObject(func, args);
727 Py_DECREF(func);
728 Py_DECREF(args);
729 return result;
730}
731
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300732/*[clinic input]
733_sre.SRE_Pattern.findall
734
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200735 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300736 pos: Py_ssize_t = 0
737 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300738
739Return a list of all non-overlapping matches of pattern in string.
740[clinic start generated code]*/
741
742static PyObject *
743_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200744 Py_ssize_t pos, Py_ssize_t endpos)
745/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000746{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000747 SRE_STATE state;
748 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100749 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000750 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000751
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300752 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000753 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000754
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000755 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000756 if (!list) {
757 state_fini(&state);
758 return NULL;
759 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000760
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000761 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000762
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000763 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000764
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000765 state_reset(&state);
766
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000767 state.ptr = state.start;
768
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300769 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300770 if (PyErr_Occurred())
771 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000772
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000773 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000774 if (status == 0)
775 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000776 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000777 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000778 }
Tim Peters3d563502006-01-21 02:47:53 +0000779
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000780 /* don't bother to build a match object */
781 switch (self->groups) {
782 case 0:
783 b = STATE_OFFSET(&state, state.start);
784 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300785 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300786 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000787 if (!item)
788 goto error;
789 break;
790 case 1:
791 item = state_getslice(&state, 1, string, 1);
792 if (!item)
793 goto error;
794 break;
795 default:
796 item = PyTuple_New(self->groups);
797 if (!item)
798 goto error;
799 for (i = 0; i < self->groups; i++) {
800 PyObject* o = state_getslice(&state, i+1, string, 1);
801 if (!o) {
802 Py_DECREF(item);
803 goto error;
804 }
805 PyTuple_SET_ITEM(item, i, o);
806 }
807 break;
808 }
809
810 status = PyList_Append(list, item);
811 Py_DECREF(item);
812 if (status < 0)
813 goto error;
814
815 if (state.ptr == state.start)
816 state.start = (void*) ((char*) state.ptr + state.charsize);
817 else
818 state.start = state.ptr;
819
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000820 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000821
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000822 state_fini(&state);
823 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000824
825error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000826 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000827 state_fini(&state);
828 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000829
Guido van Rossumb700df92000-03-31 14:59:30 +0000830}
831
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300832/*[clinic input]
833_sre.SRE_Pattern.finditer
834
835 string: object
836 pos: Py_ssize_t = 0
837 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
838
839Return an iterator over all non-overlapping matches for the RE pattern in string.
840
841For each match, the iterator returns a match object.
842[clinic start generated code]*/
843
844static PyObject *
845_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
846 Py_ssize_t pos, Py_ssize_t endpos)
847/*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
Fredrik Lundh703ce812001-10-24 22:16:30 +0000848{
849 PyObject* scanner;
850 PyObject* search;
851 PyObject* iterator;
852
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300853 scanner = pattern_scanner(self, string, pos, endpos);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000854 if (!scanner)
855 return NULL;
856
857 search = PyObject_GetAttrString(scanner, "search");
858 Py_DECREF(scanner);
859 if (!search)
860 return NULL;
861
862 iterator = PyCallIter_New(search, Py_None);
863 Py_DECREF(search);
864
865 return iterator;
866}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000867
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300868/*[clinic input]
869_sre.SRE_Pattern.scanner
870
871 string: object
872 pos: Py_ssize_t = 0
873 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
874
875[clinic start generated code]*/
876
877static PyObject *
878_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
879 Py_ssize_t pos, Py_ssize_t endpos)
880/*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
881{
882 return pattern_scanner(self, string, pos, endpos);
883}
884
885/*[clinic input]
886_sre.SRE_Pattern.split
887
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200888 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300889 maxsplit: Py_ssize_t = 0
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300890
891Split string by the occurrences of pattern.
892[clinic start generated code]*/
893
894static PyObject *
895_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200896 Py_ssize_t maxsplit)
897/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000898{
899 SRE_STATE state;
900 PyObject* list;
901 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100902 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000903 Py_ssize_t n;
904 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000905 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000906
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200907 assert(self->codesize != 0);
908 if (self->code[0] != SRE_OP_INFO || self->code[3] == 0) {
909 if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) {
910 PyErr_SetString(PyExc_ValueError,
911 "split() requires a non-empty pattern match.");
912 return NULL;
913 }
914 if (PyErr_WarnEx(PyExc_FutureWarning,
915 "split() requires a non-empty pattern match.",
916 1) < 0)
917 return NULL;
918 }
919
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300920 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000921 return NULL;
922
923 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000924 if (!list) {
925 state_fini(&state);
926 return NULL;
927 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000928
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000929 n = 0;
930 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000931
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000932 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000933
934 state_reset(&state);
935
936 state.ptr = state.start;
937
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300938 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300939 if (PyErr_Occurred())
940 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000941
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000942 if (status <= 0) {
943 if (status == 0)
944 break;
945 pattern_error(status);
946 goto error;
947 }
Tim Peters3d563502006-01-21 02:47:53 +0000948
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000949 if (state.start == state.ptr) {
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +0300950 if (last == state.end || state.ptr == state.end)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000951 break;
952 /* skip one character */
953 state.start = (void*) ((char*) state.ptr + state.charsize);
954 continue;
955 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000956
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000957 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300958 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000959 string, STATE_OFFSET(&state, last),
960 STATE_OFFSET(&state, state.start)
961 );
962 if (!item)
963 goto error;
964 status = PyList_Append(list, item);
965 Py_DECREF(item);
966 if (status < 0)
967 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000968
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000969 /* add groups (if any) */
970 for (i = 0; i < self->groups; i++) {
971 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000972 if (!item)
973 goto error;
974 status = PyList_Append(list, item);
975 Py_DECREF(item);
976 if (status < 0)
977 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000978 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000979
980 n = n + 1;
981
982 last = state.start = state.ptr;
983
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000984 }
985
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000986 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300987 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000988 string, STATE_OFFSET(&state, last), state.endpos
989 );
990 if (!item)
991 goto error;
992 status = PyList_Append(list, item);
993 Py_DECREF(item);
994 if (status < 0)
995 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000996
997 state_fini(&state);
998 return list;
999
1000error:
1001 Py_DECREF(list);
1002 state_fini(&state);
1003 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001004
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001005}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001006
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001007static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001008pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001009 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001010{
1011 SRE_STATE state;
1012 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001013 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001014 PyObject* item;
1015 PyObject* filter;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001016 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001017 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01001018 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001019 Py_ssize_t n;
1020 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001021 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001022 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001023 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001024
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001025 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001026 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001027 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001028 Py_INCREF(filter);
1029 filter_is_callable = 1;
1030 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001031 /* if not callable, check if it's a literal string */
1032 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001033 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001034 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001035 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001036 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001037 if (charsize == 1)
1038 literal = memchr(ptr, '\\', n) == NULL;
1039 else
1040 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001041 } else {
1042 PyErr_Clear();
1043 literal = 0;
1044 }
Benjamin Petersone48944b2012-03-07 14:50:25 -06001045 if (view.buf)
1046 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001047 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001048 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001049 Py_INCREF(filter);
1050 filter_is_callable = 0;
1051 } else {
1052 /* not a literal; hand it over to the template compiler */
1053 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001054 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001055 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001056 );
1057 if (!filter)
1058 return NULL;
1059 filter_is_callable = PyCallable_Check(filter);
1060 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00001061 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001062
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001063 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001064 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001065 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001066 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001067
1068 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001069 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001070 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001071 state_fini(&state);
1072 return NULL;
1073 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001074
1075 n = i = 0;
1076
1077 while (!count || n < count) {
1078
1079 state_reset(&state);
1080
1081 state.ptr = state.start;
1082
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001083 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001084 if (PyErr_Occurred())
1085 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001086
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001087 if (status <= 0) {
1088 if (status == 0)
1089 break;
1090 pattern_error(status);
1091 goto error;
1092 }
Tim Peters3d563502006-01-21 02:47:53 +00001093
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001094 b = STATE_OFFSET(&state, state.start);
1095 e = STATE_OFFSET(&state, state.ptr);
1096
1097 if (i < b) {
1098 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001099 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001100 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001101 if (!item)
1102 goto error;
1103 status = PyList_Append(list, item);
1104 Py_DECREF(item);
1105 if (status < 0)
1106 goto error;
1107
1108 } else if (i == b && i == e && n > 0)
1109 /* ignore empty match on latest position */
1110 goto next;
1111
1112 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001113 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001114 match = pattern_new_match(self, &state, 1);
1115 if (!match)
1116 goto error;
Victor Stinner7bfb42d2016-12-05 17:04:32 +01001117 item = PyObject_CallFunctionObjArgs(filter, match, NULL);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001118 Py_DECREF(match);
1119 if (!item)
1120 goto error;
1121 } else {
1122 /* filter is literal string */
1123 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001124 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001125 }
1126
1127 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001128 if (item != Py_None) {
1129 status = PyList_Append(list, item);
1130 Py_DECREF(item);
1131 if (status < 0)
1132 goto error;
1133 }
Tim Peters3d563502006-01-21 02:47:53 +00001134
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001135 i = e;
1136 n = n + 1;
1137
1138next:
1139 /* move on */
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001140 if (state.ptr == state.end)
1141 break;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001142 if (state.ptr == state.start)
1143 state.start = (void*) ((char*) state.ptr + state.charsize);
1144 else
1145 state.start = state.ptr;
1146
1147 }
1148
1149 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001150 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001151 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001152 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001153 if (!item)
1154 goto error;
1155 status = PyList_Append(list, item);
1156 Py_DECREF(item);
1157 if (status < 0)
1158 goto error;
1159 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001160
1161 state_fini(&state);
1162
Guido van Rossum4e173842001-12-07 04:25:10 +00001163 Py_DECREF(filter);
1164
Fredrik Lundhdac58492001-10-21 21:48:30 +00001165 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001166 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001167 if (!joiner) {
1168 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001169 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001170 }
1171 if (PyList_GET_SIZE(list) == 0) {
1172 Py_DECREF(list);
1173 item = joiner;
1174 }
1175 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001176 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001177 item = _PyBytes_Join(joiner, list);
1178 else
1179 item = PyUnicode_Join(joiner, list);
1180 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001181 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001182 if (!item)
1183 return NULL;
1184 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001185
1186 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001187 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001188
1189 return item;
1190
1191error:
1192 Py_DECREF(list);
1193 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001194 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001195 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001196
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001197}
1198
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001199/*[clinic input]
1200_sre.SRE_Pattern.sub
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001201
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001202 repl: object
1203 string: object
1204 count: Py_ssize_t = 0
1205
1206Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1207[clinic start generated code]*/
1208
1209static PyObject *
1210_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1211 PyObject *string, Py_ssize_t count)
1212/*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1213{
1214 return pattern_subx(self, repl, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001215}
1216
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001217/*[clinic input]
1218_sre.SRE_Pattern.subn
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001219
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001220 repl: object
1221 string: object
1222 count: Py_ssize_t = 0
1223
1224Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1225[clinic start generated code]*/
1226
1227static PyObject *
1228_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1229 PyObject *string, Py_ssize_t count)
1230/*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1231{
1232 return pattern_subx(self, repl, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001233}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001234
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001235/*[clinic input]
1236_sre.SRE_Pattern.__copy__
1237
1238[clinic start generated code]*/
1239
1240static PyObject *
1241_sre_SRE_Pattern___copy___impl(PatternObject *self)
1242/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001243{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001244 Py_INCREF(self);
1245 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001246}
1247
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001248/*[clinic input]
1249_sre.SRE_Pattern.__deepcopy__
1250
1251 memo: object
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001252 /
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001253
1254[clinic start generated code]*/
1255
1256static PyObject *
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001257_sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
1258/*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001259{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001260 Py_INCREF(self);
1261 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001262}
1263
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001264static PyObject *
1265pattern_repr(PatternObject *obj)
1266{
1267 static const struct {
1268 const char *name;
1269 int value;
1270 } flag_names[] = {
1271 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1272 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1273 {"re.LOCALE", SRE_FLAG_LOCALE},
1274 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1275 {"re.DOTALL", SRE_FLAG_DOTALL},
1276 {"re.UNICODE", SRE_FLAG_UNICODE},
1277 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1278 {"re.DEBUG", SRE_FLAG_DEBUG},
1279 {"re.ASCII", SRE_FLAG_ASCII},
1280 };
1281 PyObject *result = NULL;
1282 PyObject *flag_items;
Victor Stinner706768c2014-08-16 01:03:39 +02001283 size_t i;
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001284 int flags = obj->flags;
1285
1286 /* Omit re.UNICODE for valid string patterns. */
1287 if (obj->isbytes == 0 &&
1288 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1289 SRE_FLAG_UNICODE)
1290 flags &= ~SRE_FLAG_UNICODE;
1291
1292 flag_items = PyList_New(0);
1293 if (!flag_items)
1294 return NULL;
1295
1296 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1297 if (flags & flag_names[i].value) {
1298 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1299 if (!item)
1300 goto done;
1301
1302 if (PyList_Append(flag_items, item) < 0) {
1303 Py_DECREF(item);
1304 goto done;
1305 }
1306 Py_DECREF(item);
1307 flags &= ~flag_names[i].value;
1308 }
1309 }
1310 if (flags) {
1311 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1312 if (!item)
1313 goto done;
1314
1315 if (PyList_Append(flag_items, item) < 0) {
1316 Py_DECREF(item);
1317 goto done;
1318 }
1319 Py_DECREF(item);
1320 }
1321
1322 if (PyList_Size(flag_items) > 0) {
1323 PyObject *flags_result;
1324 PyObject *sep = PyUnicode_FromString("|");
1325 if (!sep)
1326 goto done;
1327 flags_result = PyUnicode_Join(sep, flag_items);
1328 Py_DECREF(sep);
1329 if (!flags_result)
1330 goto done;
1331 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1332 obj->pattern, flags_result);
1333 Py_DECREF(flags_result);
1334 }
1335 else {
1336 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1337 }
1338
1339done:
1340 Py_DECREF(flag_items);
1341 return result;
1342}
1343
Raymond Hettinger94478742004-09-24 04:31:19 +00001344PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
1345
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001346/* PatternObject's 'groupindex' method. */
1347static PyObject *
1348pattern_groupindex(PatternObject *self)
1349{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001350 if (self->groupindex == NULL)
1351 return PyDict_New();
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001352 return PyDictProxy_New(self->groupindex);
1353}
1354
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001355static int _validate(PatternObject *self); /* Forward */
1356
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001357/*[clinic input]
1358_sre.compile
1359
1360 pattern: object
1361 flags: int
1362 code: object(subclass_of='&PyList_Type')
1363 groups: Py_ssize_t
Victor Stinner726a57d2016-11-22 23:04:39 +01001364 groupindex: object(subclass_of='&PyDict_Type')
1365 indexgroup: object(subclass_of='&PyTuple_Type')
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001366
1367[clinic start generated code]*/
1368
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001369static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001370_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001371 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1372 PyObject *indexgroup)
Victor Stinner726a57d2016-11-22 23:04:39 +01001373/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001374{
1375 /* "compile" pattern descriptor to pattern object */
1376
1377 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001378 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001379
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001380 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001381 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001382 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1383 if (!self)
1384 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001385 self->weakreflist = NULL;
1386 self->pattern = NULL;
1387 self->groupindex = NULL;
1388 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001389
1390 self->codesize = n;
1391
1392 for (i = 0; i < n; i++) {
1393 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001394 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001395 self->code[i] = (SRE_CODE) value;
1396 if ((unsigned long) self->code[i] != value) {
1397 PyErr_SetString(PyExc_OverflowError,
1398 "regular expression code size limit exceeded");
1399 break;
1400 }
1401 }
1402
1403 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001404 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001405 return NULL;
1406 }
1407
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001408 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001409 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001410 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001411 else {
1412 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001413 int charsize;
1414 Py_buffer view;
1415 view.buf = NULL;
1416 if (!getstring(pattern, &p_length, &self->isbytes,
1417 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001418 Py_DECREF(self);
1419 return NULL;
1420 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001421 if (view.buf)
1422 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001423 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001424
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001425 Py_INCREF(pattern);
1426 self->pattern = pattern;
1427
1428 self->flags = flags;
1429
1430 self->groups = groups;
1431
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001432 if (PyDict_GET_SIZE(groupindex) > 0) {
1433 Py_INCREF(groupindex);
1434 self->groupindex = groupindex;
1435 if (PyTuple_GET_SIZE(indexgroup) > 0) {
1436 Py_INCREF(indexgroup);
1437 self->indexgroup = indexgroup;
1438 }
1439 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001440
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001441 if (!_validate(self)) {
1442 Py_DECREF(self);
1443 return NULL;
1444 }
1445
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001446 return (PyObject*) self;
1447}
1448
Guido van Rossumb700df92000-03-31 14:59:30 +00001449/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001450/* Code validation */
1451
1452/* To learn more about this code, have a look at the _compile() function in
1453 Lib/sre_compile.py. The validation functions below checks the code array
1454 for conformance with the code patterns generated there.
1455
1456 The nice thing about the generated code is that it is position-independent:
1457 all jumps are relative jumps forward. Also, jumps don't cross each other:
1458 the target of a later jump is always earlier than the target of an earlier
1459 jump. IOW, this is okay:
1460
1461 J---------J-------T--------T
1462 \ \_____/ /
1463 \______________________/
1464
1465 but this is not:
1466
1467 J---------J-------T--------T
1468 \_________\_____/ /
1469 \____________/
1470
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001471 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001472*/
1473
1474/* Defining this one enables tracing of the validator */
1475#undef VVERBOSE
1476
1477/* Trace macro for the validator */
1478#if defined(VVERBOSE)
1479#define VTRACE(v) printf v
1480#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001481#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001482#endif
1483
1484/* Report failure */
1485#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1486
1487/* Extract opcode, argument, or skip count from code array */
1488#define GET_OP \
1489 do { \
1490 VTRACE(("%p: ", code)); \
1491 if (code >= end) FAIL; \
1492 op = *code++; \
1493 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1494 } while (0)
1495#define GET_ARG \
1496 do { \
1497 VTRACE(("%p= ", code)); \
1498 if (code >= end) FAIL; \
1499 arg = *code++; \
1500 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1501 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001502#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001503 do { \
1504 VTRACE(("%p= ", code)); \
1505 if (code >= end) FAIL; \
1506 skip = *code; \
1507 VTRACE(("%lu (skip to %p)\n", \
1508 (unsigned long)skip, code+skip)); \
Benjamin Petersonca470632016-09-06 13:47:26 -07001509 if (skip-adj > (uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001510 FAIL; \
1511 code++; \
1512 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001513#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001514
1515static int
1516_validate_charset(SRE_CODE *code, SRE_CODE *end)
1517{
1518 /* Some variables are manipulated by the macros above */
1519 SRE_CODE op;
1520 SRE_CODE arg;
1521 SRE_CODE offset;
1522 int i;
1523
1524 while (code < end) {
1525 GET_OP;
1526 switch (op) {
1527
1528 case SRE_OP_NEGATE:
1529 break;
1530
1531 case SRE_OP_LITERAL:
1532 GET_ARG;
1533 break;
1534
1535 case SRE_OP_RANGE:
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +02001536 case SRE_OP_RANGE_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001537 GET_ARG;
1538 GET_ARG;
1539 break;
1540
1541 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001542 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Benjamin Petersonca470632016-09-06 13:47:26 -07001543 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001544 FAIL;
1545 code += offset;
1546 break;
1547
1548 case SRE_OP_BIGCHARSET:
1549 GET_ARG; /* Number of blocks */
1550 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001551 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001552 FAIL;
1553 /* Make sure that each byte points to a valid block */
1554 for (i = 0; i < 256; i++) {
1555 if (((unsigned char *)code)[i] >= arg)
1556 FAIL;
1557 }
1558 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001559 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Benjamin Petersonca470632016-09-06 13:47:26 -07001560 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001561 FAIL;
1562 code += offset;
1563 break;
1564
1565 case SRE_OP_CATEGORY:
1566 GET_ARG;
1567 switch (arg) {
1568 case SRE_CATEGORY_DIGIT:
1569 case SRE_CATEGORY_NOT_DIGIT:
1570 case SRE_CATEGORY_SPACE:
1571 case SRE_CATEGORY_NOT_SPACE:
1572 case SRE_CATEGORY_WORD:
1573 case SRE_CATEGORY_NOT_WORD:
1574 case SRE_CATEGORY_LINEBREAK:
1575 case SRE_CATEGORY_NOT_LINEBREAK:
1576 case SRE_CATEGORY_LOC_WORD:
1577 case SRE_CATEGORY_LOC_NOT_WORD:
1578 case SRE_CATEGORY_UNI_DIGIT:
1579 case SRE_CATEGORY_UNI_NOT_DIGIT:
1580 case SRE_CATEGORY_UNI_SPACE:
1581 case SRE_CATEGORY_UNI_NOT_SPACE:
1582 case SRE_CATEGORY_UNI_WORD:
1583 case SRE_CATEGORY_UNI_NOT_WORD:
1584 case SRE_CATEGORY_UNI_LINEBREAK:
1585 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1586 break;
1587 default:
1588 FAIL;
1589 }
1590 break;
1591
1592 default:
1593 FAIL;
1594
1595 }
1596 }
1597
1598 return 1;
1599}
1600
1601static int
1602_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1603{
1604 /* Some variables are manipulated by the macros above */
1605 SRE_CODE op;
1606 SRE_CODE arg;
1607 SRE_CODE skip;
1608
1609 VTRACE(("code=%p, end=%p\n", code, end));
1610
1611 if (code > end)
1612 FAIL;
1613
1614 while (code < end) {
1615 GET_OP;
1616 switch (op) {
1617
1618 case SRE_OP_MARK:
1619 /* We don't check whether marks are properly nested; the
1620 sre_match() code is robust even if they don't, and the worst
1621 you can get is nonsensical match results. */
1622 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001623 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001624 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1625 FAIL;
1626 }
1627 break;
1628
1629 case SRE_OP_LITERAL:
1630 case SRE_OP_NOT_LITERAL:
1631 case SRE_OP_LITERAL_IGNORE:
1632 case SRE_OP_NOT_LITERAL_IGNORE:
Serhiy Storchaka898ff032017-05-05 08:53:40 +03001633 case SRE_OP_LITERAL_LOC_IGNORE:
1634 case SRE_OP_NOT_LITERAL_LOC_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001635 GET_ARG;
1636 /* The arg is just a character, nothing to check */
1637 break;
1638
1639 case SRE_OP_SUCCESS:
1640 case SRE_OP_FAILURE:
1641 /* Nothing to check; these normally end the matching process */
1642 break;
1643
1644 case SRE_OP_AT:
1645 GET_ARG;
1646 switch (arg) {
1647 case SRE_AT_BEGINNING:
1648 case SRE_AT_BEGINNING_STRING:
1649 case SRE_AT_BEGINNING_LINE:
1650 case SRE_AT_END:
1651 case SRE_AT_END_LINE:
1652 case SRE_AT_END_STRING:
1653 case SRE_AT_BOUNDARY:
1654 case SRE_AT_NON_BOUNDARY:
1655 case SRE_AT_LOC_BOUNDARY:
1656 case SRE_AT_LOC_NON_BOUNDARY:
1657 case SRE_AT_UNI_BOUNDARY:
1658 case SRE_AT_UNI_NON_BOUNDARY:
1659 break;
1660 default:
1661 FAIL;
1662 }
1663 break;
1664
1665 case SRE_OP_ANY:
1666 case SRE_OP_ANY_ALL:
1667 /* These have no operands */
1668 break;
1669
1670 case SRE_OP_IN:
1671 case SRE_OP_IN_IGNORE:
Serhiy Storchaka898ff032017-05-05 08:53:40 +03001672 case SRE_OP_IN_LOC_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001673 GET_SKIP;
1674 /* Stop 1 before the end; we check the FAILURE below */
1675 if (!_validate_charset(code, code+skip-2))
1676 FAIL;
1677 if (code[skip-2] != SRE_OP_FAILURE)
1678 FAIL;
1679 code += skip-1;
1680 break;
1681
1682 case SRE_OP_INFO:
1683 {
1684 /* A minimal info field is
1685 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1686 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1687 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001688 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001689 SRE_CODE *newcode;
1690 GET_SKIP;
1691 newcode = code+skip-1;
1692 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001693 GET_ARG;
1694 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001695 /* Check that only valid flags are present */
1696 if ((flags & ~(SRE_INFO_PREFIX |
1697 SRE_INFO_LITERAL |
1698 SRE_INFO_CHARSET)) != 0)
1699 FAIL;
1700 /* PREFIX and CHARSET are mutually exclusive */
1701 if ((flags & SRE_INFO_PREFIX) &&
1702 (flags & SRE_INFO_CHARSET))
1703 FAIL;
1704 /* LITERAL implies PREFIX */
1705 if ((flags & SRE_INFO_LITERAL) &&
1706 !(flags & SRE_INFO_PREFIX))
1707 FAIL;
1708 /* Validate the prefix */
1709 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001710 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001711 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001712 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001713 /* Here comes the prefix string */
Benjamin Petersonca470632016-09-06 13:47:26 -07001714 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001715 FAIL;
1716 code += prefix_len;
1717 /* And here comes the overlap table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001718 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001719 FAIL;
1720 /* Each overlap value should be < prefix_len */
1721 for (i = 0; i < prefix_len; i++) {
1722 if (code[i] >= prefix_len)
1723 FAIL;
1724 }
1725 code += prefix_len;
1726 }
1727 /* Validate the charset */
1728 if (flags & SRE_INFO_CHARSET) {
1729 if (!_validate_charset(code, newcode-1))
1730 FAIL;
1731 if (newcode[-1] != SRE_OP_FAILURE)
1732 FAIL;
1733 code = newcode;
1734 }
1735 else if (code != newcode) {
1736 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1737 FAIL;
1738 }
1739 }
1740 break;
1741
1742 case SRE_OP_BRANCH:
1743 {
1744 SRE_CODE *target = NULL;
1745 for (;;) {
1746 GET_SKIP;
1747 if (skip == 0)
1748 break;
1749 /* Stop 2 before the end; we check the JUMP below */
1750 if (!_validate_inner(code, code+skip-3, groups))
1751 FAIL;
1752 code += skip-3;
1753 /* Check that it ends with a JUMP, and that each JUMP
1754 has the same target */
1755 GET_OP;
1756 if (op != SRE_OP_JUMP)
1757 FAIL;
1758 GET_SKIP;
1759 if (target == NULL)
1760 target = code+skip-1;
1761 else if (code+skip-1 != target)
1762 FAIL;
1763 }
1764 }
1765 break;
1766
1767 case SRE_OP_REPEAT_ONE:
1768 case SRE_OP_MIN_REPEAT_ONE:
1769 {
1770 SRE_CODE min, max;
1771 GET_SKIP;
1772 GET_ARG; min = arg;
1773 GET_ARG; max = arg;
1774 if (min > max)
1775 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001776 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001777 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001778 if (!_validate_inner(code, code+skip-4, groups))
1779 FAIL;
1780 code += skip-4;
1781 GET_OP;
1782 if (op != SRE_OP_SUCCESS)
1783 FAIL;
1784 }
1785 break;
1786
1787 case SRE_OP_REPEAT:
1788 {
1789 SRE_CODE min, max;
1790 GET_SKIP;
1791 GET_ARG; min = arg;
1792 GET_ARG; max = arg;
1793 if (min > max)
1794 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001795 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001796 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001797 if (!_validate_inner(code, code+skip-3, groups))
1798 FAIL;
1799 code += skip-3;
1800 GET_OP;
1801 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1802 FAIL;
1803 }
1804 break;
1805
1806 case SRE_OP_GROUPREF:
1807 case SRE_OP_GROUPREF_IGNORE:
1808 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001809 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001810 FAIL;
1811 break;
1812
1813 case SRE_OP_GROUPREF_EXISTS:
1814 /* The regex syntax for this is: '(?(group)then|else)', where
1815 'group' is either an integer group number or a group name,
1816 'then' and 'else' are sub-regexes, and 'else' is optional. */
1817 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001818 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001819 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001820 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001821 code--; /* The skip is relative to the first arg! */
1822 /* There are two possibilities here: if there is both a 'then'
1823 part and an 'else' part, the generated code looks like:
1824
1825 GROUPREF_EXISTS
1826 <group>
1827 <skipyes>
1828 ...then part...
1829 JUMP
1830 <skipno>
1831 (<skipyes> jumps here)
1832 ...else part...
1833 (<skipno> jumps here)
1834
1835 If there is only a 'then' part, it looks like:
1836
1837 GROUPREF_EXISTS
1838 <group>
1839 <skip>
1840 ...then part...
1841 (<skip> jumps here)
1842
1843 There is no direct way to decide which it is, and we don't want
1844 to allow arbitrary jumps anywhere in the code; so we just look
1845 for a JUMP opcode preceding our skip target.
1846 */
Benjamin Petersonca470632016-09-06 13:47:26 -07001847 if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001848 code[skip-3] == SRE_OP_JUMP)
1849 {
1850 VTRACE(("both then and else parts present\n"));
1851 if (!_validate_inner(code+1, code+skip-3, groups))
1852 FAIL;
1853 code += skip-2; /* Position after JUMP, at <skipno> */
1854 GET_SKIP;
1855 if (!_validate_inner(code, code+skip-1, groups))
1856 FAIL;
1857 code += skip-1;
1858 }
1859 else {
1860 VTRACE(("only a then part present\n"));
1861 if (!_validate_inner(code+1, code+skip-1, groups))
1862 FAIL;
1863 code += skip-1;
1864 }
1865 break;
1866
1867 case SRE_OP_ASSERT:
1868 case SRE_OP_ASSERT_NOT:
1869 GET_SKIP;
1870 GET_ARG; /* 0 for lookahead, width for lookbehind */
1871 code--; /* Back up over arg to simplify math below */
1872 if (arg & 0x80000000)
1873 FAIL; /* Width too large */
1874 /* Stop 1 before the end; we check the SUCCESS below */
1875 if (!_validate_inner(code+1, code+skip-2, groups))
1876 FAIL;
1877 code += skip-2;
1878 GET_OP;
1879 if (op != SRE_OP_SUCCESS)
1880 FAIL;
1881 break;
1882
1883 default:
1884 FAIL;
1885
1886 }
1887 }
1888
1889 VTRACE(("okay\n"));
1890 return 1;
1891}
1892
1893static int
1894_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1895{
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001896 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1897 code >= end || end[-1] != SRE_OP_SUCCESS)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001898 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001899 return _validate_inner(code, end-1, groups);
1900}
1901
1902static int
1903_validate(PatternObject *self)
1904{
1905 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1906 {
1907 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1908 return 0;
1909 }
1910 else
1911 VTRACE(("Success!\n"));
1912 return 1;
1913}
1914
1915/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001916/* match methods */
1917
1918static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001919match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001920{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001921 Py_XDECREF(self->regs);
1922 Py_XDECREF(self->string);
1923 Py_DECREF(self->pattern);
1924 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001925}
1926
1927static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001928match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001929{
Serhiy Storchaka25324972013-10-16 12:46:28 +03001930 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001931 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001932 Py_buffer view;
1933 PyObject *result;
1934 void* ptr;
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02001935 Py_ssize_t i, j;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001936
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001937 if (index < 0 || index >= self->groups) {
1938 /* raise IndexError if we were given a bad group number */
1939 PyErr_SetString(
1940 PyExc_IndexError,
1941 "no such group"
1942 );
1943 return NULL;
1944 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001945
Fredrik Lundh6f013982000-07-03 18:44:21 +00001946 index *= 2;
1947
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001948 if (self->string == Py_None || self->mark[index] < 0) {
1949 /* return default value if the string or group is undefined */
1950 Py_INCREF(def);
1951 return def;
1952 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001953
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001954 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001955 if (ptr == NULL)
1956 return NULL;
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02001957
1958 i = self->mark[index];
1959 j = self->mark[index+1];
1960 i = Py_MIN(i, length);
1961 j = Py_MIN(j, length);
1962 result = getslice(isbytes, ptr, self->string, i, j);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001963 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001964 PyBuffer_Release(&view);
1965 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001966}
1967
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001968static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001969match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001970{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001971 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001972
Guido van Rossumddefaf32007-01-14 03:31:43 +00001973 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001974 /* Default value */
1975 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001976
Serhiy Storchaka977b3ac2016-06-18 16:48:07 +03001977 if (PyIndex_Check(index)) {
1978 return PyNumber_AsSsize_t(index, NULL);
1979 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001980
Fredrik Lundh6f013982000-07-03 18:44:21 +00001981 i = -1;
1982
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001983 if (self->pattern->groupindex) {
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001984 index = PyDict_GetItem(self->pattern->groupindex, index);
1985 if (index && PyLong_Check(index)) {
1986 i = PyLong_AsSsize_t(index);
1987 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001988 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001989
1990 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001991}
1992
1993static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001994match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001995{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001996 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001997}
1998
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001999/*[clinic input]
2000_sre.SRE_Match.expand
2001
2002 template: object
2003
2004Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
2005[clinic start generated code]*/
2006
2007static PyObject *
2008_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
2009/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002010{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002011 /* delegate to Python code */
2012 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00002013 SRE_PY_MODULE, "_expand",
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002014 PyTuple_Pack(3, self->pattern, self, template)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002015 );
2016}
2017
2018static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002019match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002020{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002021 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002022 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002023
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002024 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002025
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002026 switch (size) {
2027 case 0:
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002028 result = match_getslice(self, _PyLong_Zero, Py_None);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002029 break;
2030 case 1:
2031 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2032 break;
2033 default:
2034 /* fetch multiple items */
2035 result = PyTuple_New(size);
2036 if (!result)
2037 return NULL;
2038 for (i = 0; i < size; i++) {
2039 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002040 self, PyTuple_GET_ITEM(args, i), Py_None
2041 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002042 if (!item) {
2043 Py_DECREF(result);
2044 return NULL;
2045 }
2046 PyTuple_SET_ITEM(result, i, item);
2047 }
2048 break;
2049 }
2050 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002051}
2052
Eric V. Smith605bdae2016-09-11 08:55:43 -04002053static PyObject*
2054match_getitem(MatchObject* self, PyObject* name)
2055{
2056 return match_getslice(self, name, Py_None);
2057}
2058
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002059/*[clinic input]
2060_sre.SRE_Match.groups
2061
2062 default: object = None
2063 Is used for groups that did not participate in the match.
2064
2065Return a tuple containing all the subgroups of the match, from 1.
2066[clinic start generated code]*/
2067
2068static PyObject *
2069_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2070/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002071{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002072 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002073 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002074
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002075 result = PyTuple_New(self->groups-1);
2076 if (!result)
2077 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002078
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002079 for (index = 1; index < self->groups; index++) {
2080 PyObject* item;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002081 item = match_getslice_by_index(self, index, default_value);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002082 if (!item) {
2083 Py_DECREF(result);
2084 return NULL;
2085 }
2086 PyTuple_SET_ITEM(result, index-1, item);
2087 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002088
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002089 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002090}
2091
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002092/*[clinic input]
2093_sre.SRE_Match.groupdict
2094
2095 default: object = None
2096 Is used for groups that did not participate in the match.
2097
2098Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2099[clinic start generated code]*/
2100
2101static PyObject *
2102_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2103/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002104{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002105 PyObject *result;
2106 PyObject *key;
2107 PyObject *value;
2108 Py_ssize_t pos = 0;
2109 Py_hash_t hash;
Guido van Rossumb700df92000-03-31 14:59:30 +00002110
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002111 result = PyDict_New();
2112 if (!result || !self->pattern->groupindex)
2113 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002114
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002115 while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002116 int status;
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002117 Py_INCREF(key);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002118 value = match_getslice(self, key, default_value);
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002119 if (!value) {
2120 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002121 goto failed;
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002122 }
2123 status = _PyDict_SetItem_KnownHash(result, key, value, hash);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002124 Py_DECREF(value);
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002125 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002126 if (status < 0)
2127 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002128 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002129
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002130 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002131
2132failed:
Fredrik Lundh770617b2001-01-14 15:06:11 +00002133 Py_DECREF(result);
2134 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002135}
2136
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002137/*[clinic input]
2138_sre.SRE_Match.start -> Py_ssize_t
2139
2140 group: object(c_default="NULL") = 0
2141 /
2142
2143Return index of the start of the substring matched by group.
2144[clinic start generated code]*/
2145
2146static Py_ssize_t
2147_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2148/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002149{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002150 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002151
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002152 if (index < 0 || index >= self->groups) {
2153 PyErr_SetString(
2154 PyExc_IndexError,
2155 "no such group"
2156 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002157 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002158 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002159
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002160 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002161 return self->mark[index*2];
Guido van Rossumb700df92000-03-31 14:59:30 +00002162}
2163
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002164/*[clinic input]
2165_sre.SRE_Match.end -> Py_ssize_t
2166
2167 group: object(c_default="NULL") = 0
2168 /
2169
2170Return index of the end of the substring matched by group.
2171[clinic start generated code]*/
2172
2173static Py_ssize_t
2174_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2175/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002176{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002177 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002178
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002179 if (index < 0 || index >= self->groups) {
2180 PyErr_SetString(
2181 PyExc_IndexError,
2182 "no such group"
2183 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002184 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002185 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002186
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002187 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002188 return self->mark[index*2+1];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002189}
2190
2191LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002192_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002193{
2194 PyObject* pair;
2195 PyObject* item;
2196
2197 pair = PyTuple_New(2);
2198 if (!pair)
2199 return NULL;
2200
Christian Heimes217cfd12007-12-02 14:31:20 +00002201 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002202 if (!item)
2203 goto error;
2204 PyTuple_SET_ITEM(pair, 0, item);
2205
Christian Heimes217cfd12007-12-02 14:31:20 +00002206 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002207 if (!item)
2208 goto error;
2209 PyTuple_SET_ITEM(pair, 1, item);
2210
2211 return pair;
2212
2213 error:
2214 Py_DECREF(pair);
2215 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002216}
2217
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002218/*[clinic input]
2219_sre.SRE_Match.span
2220
2221 group: object(c_default="NULL") = 0
2222 /
2223
2224For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).
2225[clinic start generated code]*/
2226
2227static PyObject *
2228_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2229/*[clinic end generated code: output=f02ae40594d14fe6 input=49092b6008d176d3]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002230{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002231 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002232
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002233 if (index < 0 || index >= self->groups) {
2234 PyErr_SetString(
2235 PyExc_IndexError,
2236 "no such group"
2237 );
2238 return NULL;
2239 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002240
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002241 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002242 return _pair(self->mark[index*2], self->mark[index*2+1]);
2243}
2244
2245static PyObject*
2246match_regs(MatchObject* self)
2247{
2248 PyObject* regs;
2249 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002250 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002251
2252 regs = PyTuple_New(self->groups);
2253 if (!regs)
2254 return NULL;
2255
2256 for (index = 0; index < self->groups; index++) {
2257 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2258 if (!item) {
2259 Py_DECREF(regs);
2260 return NULL;
2261 }
2262 PyTuple_SET_ITEM(regs, index, item);
2263 }
2264
2265 Py_INCREF(regs);
2266 self->regs = regs;
2267
2268 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002269}
2270
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002271/*[clinic input]
2272_sre.SRE_Match.__copy__
2273
2274[clinic start generated code]*/
2275
2276static PyObject *
2277_sre_SRE_Match___copy___impl(MatchObject *self)
2278/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002279{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002280 Py_INCREF(self);
2281 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002282}
2283
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002284/*[clinic input]
2285_sre.SRE_Match.__deepcopy__
2286
2287 memo: object
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002288 /
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002289
2290[clinic start generated code]*/
2291
2292static PyObject *
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002293_sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
2294/*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002295{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002296 Py_INCREF(self);
2297 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002298}
2299
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002300PyDoc_STRVAR(match_doc,
2301"The result of re.match() and re.search().\n\
2302Match objects always have a boolean value of True.");
2303
2304PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002305"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002306 Return subgroup(s) of the match by indices or names.\n\
2307 For 0 returns the entire match.");
2308
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002309static PyObject *
2310match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002311{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002312 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002313 return PyLong_FromSsize_t(self->lastindex);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002314 Py_RETURN_NONE;
Guido van Rossumb700df92000-03-31 14:59:30 +00002315}
2316
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002317static PyObject *
2318match_lastgroup_get(MatchObject *self)
2319{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002320 if (self->pattern->indexgroup &&
2321 self->lastindex >= 0 &&
2322 self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2323 {
2324 PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2325 self->lastindex);
2326 Py_INCREF(result);
2327 return result;
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002328 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002329 Py_RETURN_NONE;
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002330}
2331
2332static PyObject *
2333match_regs_get(MatchObject *self)
2334{
2335 if (self->regs) {
2336 Py_INCREF(self->regs);
2337 return self->regs;
2338 } else
2339 return match_regs(self);
2340}
2341
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002342static PyObject *
2343match_repr(MatchObject *self)
2344{
2345 PyObject *result;
2346 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2347 if (group0 == NULL)
2348 return NULL;
2349 result = PyUnicode_FromFormat(
2350 "<%s object; span=(%d, %d), match=%.50R>",
2351 Py_TYPE(self)->tp_name,
2352 self->mark[0], self->mark[1], group0);
2353 Py_DECREF(group0);
2354 return result;
2355}
2356
2357
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002358static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002359pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002360{
2361 /* create match object (from state object) */
2362
2363 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002364 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002365 char* base;
2366 int n;
2367
2368 if (status > 0) {
2369
2370 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002371 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002372 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2373 2*(pattern->groups+1));
2374 if (!match)
2375 return NULL;
2376
2377 Py_INCREF(pattern);
2378 match->pattern = pattern;
2379
2380 Py_INCREF(state->string);
2381 match->string = state->string;
2382
2383 match->regs = NULL;
2384 match->groups = pattern->groups+1;
2385
2386 /* fill in group slices */
2387
2388 base = (char*) state->beginning;
2389 n = state->charsize;
2390
2391 match->mark[0] = ((char*) state->start - base) / n;
2392 match->mark[1] = ((char*) state->ptr - base) / n;
2393
2394 for (i = j = 0; i < pattern->groups; i++, j+=2)
2395 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2396 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2397 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2398 } else
2399 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2400
2401 match->pos = state->pos;
2402 match->endpos = state->endpos;
2403
2404 match->lastindex = state->lastindex;
2405
2406 return (PyObject*) match;
2407
2408 } else if (status == 0) {
2409
2410 /* no match */
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002411 Py_RETURN_NONE;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002412
2413 }
2414
2415 /* internal error */
2416 pattern_error(status);
2417 return NULL;
2418}
2419
2420
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002421/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002422/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002423
2424static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002425scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002426{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002427 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002428 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002429 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002430}
2431
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002432/*[clinic input]
2433_sre.SRE_Scanner.match
2434
2435[clinic start generated code]*/
2436
2437static PyObject *
2438_sre_SRE_Scanner_match_impl(ScannerObject *self)
2439/*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002440{
2441 SRE_STATE* state = &self->state;
2442 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002443 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002444
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002445 if (state->start == NULL)
2446 Py_RETURN_NONE;
2447
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002448 state_reset(state);
2449
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002450 state->ptr = state->start;
2451
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03002452 status = sre_match(state, PatternObject_GetCode(self->pattern), 0);
Thomas Wouters89f507f2006-12-13 04:49:30 +00002453 if (PyErr_Occurred())
2454 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002455
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002456 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002457 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002458
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002459 if (status == 0)
2460 state->start = NULL;
2461 else if (state->ptr != state->start)
2462 state->start = state->ptr;
2463 else if (state->ptr != state->end)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002464 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002465 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002466 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002467
2468 return match;
2469}
2470
2471
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002472/*[clinic input]
2473_sre.SRE_Scanner.search
2474
2475[clinic start generated code]*/
2476
2477static PyObject *
2478_sre_SRE_Scanner_search_impl(ScannerObject *self)
2479/*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002480{
2481 SRE_STATE* state = &self->state;
2482 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002483 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002484
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002485 if (state->start == NULL)
2486 Py_RETURN_NONE;
2487
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002488 state_reset(state);
2489
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002490 state->ptr = state->start;
2491
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002492 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002493 if (PyErr_Occurred())
2494 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002495
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002496 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002497 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002498
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002499 if (status == 0)
2500 state->start = NULL;
2501 else if (state->ptr != state->start)
2502 state->start = state->ptr;
2503 else if (state->ptr != state->end)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002504 state->start = (void*) ((char*) state->ptr + state->charsize);
2505 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002506 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002507
2508 return match;
2509}
2510
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002511static PyObject *
2512pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002513{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002514 ScannerObject* scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002515
2516 /* create scanner object */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002517 scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2518 if (!scanner)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002519 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002520 scanner->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002521
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002522 /* create search state object */
2523 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2524 Py_DECREF(scanner);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002525 return NULL;
2526 }
2527
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002528 Py_INCREF(self);
2529 scanner->pattern = (PyObject*) self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002530
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002531 return (PyObject*) scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002532}
2533
Victor Stinnerb44fb122016-11-21 16:35:08 +01002534static Py_hash_t
2535pattern_hash(PatternObject *self)
2536{
2537 Py_hash_t hash, hash2;
2538
2539 hash = PyObject_Hash(self->pattern);
2540 if (hash == -1) {
2541 return -1;
2542 }
2543
2544 hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2545 hash ^= hash2;
2546
2547 hash ^= self->flags;
2548 hash ^= self->isbytes;
2549 hash ^= self->codesize;
2550
2551 if (hash == -1) {
2552 hash = -2;
2553 }
2554 return hash;
2555}
2556
2557static PyObject*
2558pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2559{
2560 PatternObject *left, *right;
2561 int cmp;
2562
2563 if (op != Py_EQ && op != Py_NE) {
2564 Py_RETURN_NOTIMPLEMENTED;
2565 }
2566
2567 if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
2568 Py_RETURN_NOTIMPLEMENTED;
2569 }
Victor Stinnerbcf4dcc2016-11-22 15:30:38 +01002570
2571 if (lefto == righto) {
2572 /* a pattern is equal to itself */
2573 return PyBool_FromLong(op == Py_EQ);
2574 }
2575
Victor Stinnerb44fb122016-11-21 16:35:08 +01002576 left = (PatternObject *)lefto;
2577 right = (PatternObject *)righto;
2578
2579 cmp = (left->flags == right->flags
2580 && left->isbytes == right->isbytes
Victor Stinnere670b2d2016-11-22 15:23:00 +01002581 && left->codesize == right->codesize);
Victor Stinnerb44fb122016-11-21 16:35:08 +01002582 if (cmp) {
2583 /* Compare the code and the pattern because the same pattern can
2584 produce different codes depending on the locale used to compile the
2585 pattern when the re.LOCALE flag is used. Don't compare groups,
2586 indexgroup nor groupindex: they are derivated from the pattern. */
2587 cmp = (memcmp(left->code, right->code,
2588 sizeof(left->code[0]) * left->codesize) == 0);
2589 }
2590 if (cmp) {
2591 cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2592 Py_EQ);
2593 if (cmp < 0) {
2594 return NULL;
2595 }
2596 }
2597 if (op == Py_NE) {
2598 cmp = !cmp;
2599 }
2600 return PyBool_FromLong(cmp);
2601}
2602
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002603#include "clinic/_sre.c.h"
2604
2605static PyMethodDef pattern_methods[] = {
2606 _SRE_SRE_PATTERN_MATCH_METHODDEF
2607 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2608 _SRE_SRE_PATTERN_SEARCH_METHODDEF
2609 _SRE_SRE_PATTERN_SUB_METHODDEF
2610 _SRE_SRE_PATTERN_SUBN_METHODDEF
2611 _SRE_SRE_PATTERN_FINDALL_METHODDEF
2612 _SRE_SRE_PATTERN_SPLIT_METHODDEF
2613 _SRE_SRE_PATTERN_FINDITER_METHODDEF
2614 _SRE_SRE_PATTERN_SCANNER_METHODDEF
2615 _SRE_SRE_PATTERN___COPY___METHODDEF
2616 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2617 {NULL, NULL}
2618};
2619
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002620static PyGetSetDef pattern_getset[] = {
2621 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2622 "A dictionary mapping group names to group numbers."},
2623 {NULL} /* Sentinel */
2624};
2625
2626#define PAT_OFF(x) offsetof(PatternObject, x)
2627static PyMemberDef pattern_members[] = {
2628 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
2629 {"flags", T_INT, PAT_OFF(flags), READONLY},
2630 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
2631 {NULL} /* Sentinel */
2632};
2633
2634static PyTypeObject Pattern_Type = {
2635 PyVarObject_HEAD_INIT(NULL, 0)
2636 "_" SRE_MODULE ".SRE_Pattern",
2637 sizeof(PatternObject), sizeof(SRE_CODE),
2638 (destructor)pattern_dealloc, /* tp_dealloc */
2639 0, /* tp_print */
2640 0, /* tp_getattr */
2641 0, /* tp_setattr */
2642 0, /* tp_reserved */
2643 (reprfunc)pattern_repr, /* tp_repr */
2644 0, /* tp_as_number */
2645 0, /* tp_as_sequence */
2646 0, /* tp_as_mapping */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002647 (hashfunc)pattern_hash, /* tp_hash */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002648 0, /* tp_call */
2649 0, /* tp_str */
2650 0, /* tp_getattro */
2651 0, /* tp_setattro */
2652 0, /* tp_as_buffer */
2653 Py_TPFLAGS_DEFAULT, /* tp_flags */
2654 pattern_doc, /* tp_doc */
2655 0, /* tp_traverse */
2656 0, /* tp_clear */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002657 pattern_richcompare, /* tp_richcompare */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002658 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2659 0, /* tp_iter */
2660 0, /* tp_iternext */
2661 pattern_methods, /* tp_methods */
2662 pattern_members, /* tp_members */
2663 pattern_getset, /* tp_getset */
2664};
2665
Eric V. Smith605bdae2016-09-11 08:55:43 -04002666/* Match objects do not support length or assignment, but do support
2667 __getitem__. */
2668static PyMappingMethods match_as_mapping = {
2669 NULL,
2670 (binaryfunc)match_getitem,
2671 NULL
2672};
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002673
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002674static PyMethodDef match_methods[] = {
2675 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2676 _SRE_SRE_MATCH_START_METHODDEF
2677 _SRE_SRE_MATCH_END_METHODDEF
2678 _SRE_SRE_MATCH_SPAN_METHODDEF
2679 _SRE_SRE_MATCH_GROUPS_METHODDEF
2680 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2681 _SRE_SRE_MATCH_EXPAND_METHODDEF
2682 _SRE_SRE_MATCH___COPY___METHODDEF
2683 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2684 {NULL, NULL}
2685};
2686
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002687static PyGetSetDef match_getset[] = {
2688 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
2689 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
2690 {"regs", (getter)match_regs_get, (setter)NULL},
2691 {NULL}
2692};
2693
2694#define MATCH_OFF(x) offsetof(MatchObject, x)
2695static PyMemberDef match_members[] = {
2696 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
2697 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
2698 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
2699 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
2700 {NULL}
2701};
2702
2703/* FIXME: implement setattr("string", None) as a special case (to
2704 detach the associated string, if any */
2705
2706static PyTypeObject Match_Type = {
2707 PyVarObject_HEAD_INIT(NULL,0)
2708 "_" SRE_MODULE ".SRE_Match",
2709 sizeof(MatchObject), sizeof(Py_ssize_t),
2710 (destructor)match_dealloc, /* tp_dealloc */
2711 0, /* tp_print */
2712 0, /* tp_getattr */
2713 0, /* tp_setattr */
2714 0, /* tp_reserved */
2715 (reprfunc)match_repr, /* tp_repr */
2716 0, /* tp_as_number */
2717 0, /* tp_as_sequence */
Eric V. Smith605bdae2016-09-11 08:55:43 -04002718 &match_as_mapping, /* tp_as_mapping */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002719 0, /* tp_hash */
2720 0, /* tp_call */
2721 0, /* tp_str */
2722 0, /* tp_getattro */
2723 0, /* tp_setattro */
2724 0, /* tp_as_buffer */
2725 Py_TPFLAGS_DEFAULT, /* tp_flags */
2726 match_doc, /* tp_doc */
2727 0, /* tp_traverse */
2728 0, /* tp_clear */
2729 0, /* tp_richcompare */
2730 0, /* tp_weaklistoffset */
2731 0, /* tp_iter */
2732 0, /* tp_iternext */
2733 match_methods, /* tp_methods */
2734 match_members, /* tp_members */
2735 match_getset, /* tp_getset */
2736};
2737
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002738static PyMethodDef scanner_methods[] = {
2739 _SRE_SRE_SCANNER_MATCH_METHODDEF
2740 _SRE_SRE_SCANNER_SEARCH_METHODDEF
2741 {NULL, NULL}
2742};
2743
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002744#define SCAN_OFF(x) offsetof(ScannerObject, x)
2745static PyMemberDef scanner_members[] = {
2746 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2747 {NULL} /* Sentinel */
2748};
2749
2750static PyTypeObject Scanner_Type = {
2751 PyVarObject_HEAD_INIT(NULL, 0)
2752 "_" SRE_MODULE ".SRE_Scanner",
2753 sizeof(ScannerObject), 0,
2754 (destructor)scanner_dealloc,/* tp_dealloc */
2755 0, /* tp_print */
2756 0, /* tp_getattr */
2757 0, /* tp_setattr */
2758 0, /* tp_reserved */
2759 0, /* tp_repr */
2760 0, /* tp_as_number */
2761 0, /* tp_as_sequence */
2762 0, /* tp_as_mapping */
2763 0, /* tp_hash */
2764 0, /* tp_call */
2765 0, /* tp_str */
2766 0, /* tp_getattro */
2767 0, /* tp_setattro */
2768 0, /* tp_as_buffer */
2769 Py_TPFLAGS_DEFAULT, /* tp_flags */
2770 0, /* tp_doc */
2771 0, /* tp_traverse */
2772 0, /* tp_clear */
2773 0, /* tp_richcompare */
2774 0, /* tp_weaklistoffset */
2775 0, /* tp_iter */
2776 0, /* tp_iternext */
2777 scanner_methods, /* tp_methods */
2778 scanner_members, /* tp_members */
2779 0, /* tp_getset */
2780};
2781
Guido van Rossumb700df92000-03-31 14:59:30 +00002782static PyMethodDef _functions[] = {
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002783 _SRE_COMPILE_METHODDEF
2784 _SRE_GETCODESIZE_METHODDEF
Serhiy Storchaka6d336a02017-05-09 23:37:14 +03002785 _SRE_ASCII_ISCASED_METHODDEF
2786 _SRE_UNICODE_ISCASED_METHODDEF
Serhiy Storchaka7186cc22017-05-05 10:42:46 +03002787 _SRE_ASCII_TOLOWER_METHODDEF
2788 _SRE_UNICODE_TOLOWER_METHODDEF
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002789 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002790};
2791
Martin v. Löwis1a214512008-06-11 05:26:20 +00002792static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002793 PyModuleDef_HEAD_INIT,
2794 "_" SRE_MODULE,
2795 NULL,
2796 -1,
2797 _functions,
2798 NULL,
2799 NULL,
2800 NULL,
2801 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002802};
2803
2804PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002805{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002806 PyObject* m;
2807 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002808 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002809
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002810 /* Patch object types */
2811 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2812 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002813 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002814
Martin v. Löwis1a214512008-06-11 05:26:20 +00002815 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002816 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002817 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002818 d = PyModule_GetDict(m);
2819
Christian Heimes217cfd12007-12-02 14:31:20 +00002820 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002821 if (x) {
2822 PyDict_SetItemString(d, "MAGIC", x);
2823 Py_DECREF(x);
2824 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002825
Christian Heimes217cfd12007-12-02 14:31:20 +00002826 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002827 if (x) {
2828 PyDict_SetItemString(d, "CODESIZE", x);
2829 Py_DECREF(x);
2830 }
2831
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002832 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2833 if (x) {
2834 PyDict_SetItemString(d, "MAXREPEAT", x);
2835 Py_DECREF(x);
2836 }
2837
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03002838 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2839 if (x) {
2840 PyDict_SetItemString(d, "MAXGROUPS", x);
2841 Py_DECREF(x);
2842 }
2843
Neal Norwitzfe537132007-08-26 03:55:15 +00002844 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002845 if (x) {
2846 PyDict_SetItemString(d, "copyright", x);
2847 Py_DECREF(x);
2848 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002849 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002850}
2851
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002852/* vim:ts=4:sw=4:et
2853*/