blob: 2a2fd272c41f3d72d514ce31af5b7385324db49f [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020038static const char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000063/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000065/* enables copy/deepcopy handling (work in progress) */
66#undef USE_BUILTIN_COPY
67
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000068/* -------------------------------------------------------------------- */
69
Fredrik Lundh80946112000-06-29 18:03:25 +000070#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000071#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000072#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000073/* fastest possible local call under MSVC */
74#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000075#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000076#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000077#else
78#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000079#endif
80
81/* error codes */
82#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000083#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000084#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000085#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000086#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000087
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000088#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000089#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000090#else
91#define TRACE(v)
92#endif
93
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000094/* -------------------------------------------------------------------- */
95/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000096
Fredrik Lundh436c3d582000-06-29 08:58:44 +000097#define SRE_IS_DIGIT(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030098 ((ch) < 128 && Py_ISDIGIT(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000099#define SRE_IS_SPACE(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300100 ((ch) < 128 && Py_ISSPACE(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000101#define SRE_IS_LINEBREAK(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300102 ((ch) == '\n')
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000103#define SRE_IS_ALNUM(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300104 ((ch) < 128 && Py_ISALNUM(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000105#define SRE_IS_WORD(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300106 ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
Guido van Rossumb700df92000-03-31 14:59:30 +0000107
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000108static unsigned int sre_lower(unsigned int ch)
109{
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300110 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000111}
112
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200113static unsigned int sre_upper(unsigned int ch)
114{
115 return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
116}
117
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000118/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000119/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
120 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000121#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000122#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
123
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000124static unsigned int sre_lower_locale(unsigned int ch)
125{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000126 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000127}
128
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200129static unsigned int sre_upper_locale(unsigned int ch)
130{
131 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
132}
133
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000134/* unicode-specific character predicates */
135
Victor Stinner0058b862011-09-29 03:27:47 +0200136#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
137#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
138#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
139#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
140#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000141
142static unsigned int sre_lower_unicode(unsigned int ch)
143{
Victor Stinner0058b862011-09-29 03:27:47 +0200144 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000145}
146
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200147static unsigned int sre_upper_unicode(unsigned int ch)
148{
149 return (unsigned int) Py_UNICODE_TOUPPER(ch);
150}
151
Guido van Rossumb700df92000-03-31 14:59:30 +0000152LOCAL(int)
153sre_category(SRE_CODE category, unsigned int ch)
154{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000155 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000156
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000157 case SRE_CATEGORY_DIGIT:
158 return SRE_IS_DIGIT(ch);
159 case SRE_CATEGORY_NOT_DIGIT:
160 return !SRE_IS_DIGIT(ch);
161 case SRE_CATEGORY_SPACE:
162 return SRE_IS_SPACE(ch);
163 case SRE_CATEGORY_NOT_SPACE:
164 return !SRE_IS_SPACE(ch);
165 case SRE_CATEGORY_WORD:
166 return SRE_IS_WORD(ch);
167 case SRE_CATEGORY_NOT_WORD:
168 return !SRE_IS_WORD(ch);
169 case SRE_CATEGORY_LINEBREAK:
170 return SRE_IS_LINEBREAK(ch);
171 case SRE_CATEGORY_NOT_LINEBREAK:
172 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000173
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000174 case SRE_CATEGORY_LOC_WORD:
175 return SRE_LOC_IS_WORD(ch);
176 case SRE_CATEGORY_LOC_NOT_WORD:
177 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000178
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000179 case SRE_CATEGORY_UNI_DIGIT:
180 return SRE_UNI_IS_DIGIT(ch);
181 case SRE_CATEGORY_UNI_NOT_DIGIT:
182 return !SRE_UNI_IS_DIGIT(ch);
183 case SRE_CATEGORY_UNI_SPACE:
184 return SRE_UNI_IS_SPACE(ch);
185 case SRE_CATEGORY_UNI_NOT_SPACE:
186 return !SRE_UNI_IS_SPACE(ch);
187 case SRE_CATEGORY_UNI_WORD:
188 return SRE_UNI_IS_WORD(ch);
189 case SRE_CATEGORY_UNI_NOT_WORD:
190 return !SRE_UNI_IS_WORD(ch);
191 case SRE_CATEGORY_UNI_LINEBREAK:
192 return SRE_UNI_IS_LINEBREAK(ch);
193 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
194 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000195 }
196 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000197}
198
199/* helpers */
200
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000201static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000202data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000203{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000204 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000205 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000206 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000207 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000208 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000209}
210
211static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000212data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000213{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000214 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000215 minsize = state->data_stack_base+size;
216 cursize = state->data_stack_size;
217 if (cursize < minsize) {
218 void* stack;
219 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300220 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000221 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000222 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000223 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000224 return SRE_ERROR_MEMORY;
225 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000226 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000227 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000228 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000229 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000230}
231
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000232/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000233
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300234#define SRE_CHAR Py_UCS1
235#define SIZEOF_SRE_CHAR 1
236#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300237#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000238
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300239/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000240
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300241#define SRE_CHAR Py_UCS2
242#define SIZEOF_SRE_CHAR 2
243#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300244#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000245
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300246/* generate 32-bit unicode version */
247
248#define SRE_CHAR Py_UCS4
249#define SIZEOF_SRE_CHAR 4
250#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300251#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000252
253/* -------------------------------------------------------------------- */
254/* factories and destructors */
255
256/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100257static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300258static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
Guido van Rossumb700df92000-03-31 14:59:30 +0000259
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300260
261/*[clinic input]
262module _sre
263class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
264class _sre.SRE_Match "MatchObject *" "&Match_Type"
265class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
266[clinic start generated code]*/
267/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
268
Larry Hastings2d0a69a2015-05-03 14:49:19 -0700269static PyTypeObject Pattern_Type;
270static PyTypeObject Match_Type;
271static PyTypeObject Scanner_Type;
272
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300273/*[clinic input]
274_sre.getcodesize -> int
275[clinic start generated code]*/
276
277static int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300278_sre_getcodesize_impl(PyObject *module)
279/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000280{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300281 return sizeof(SRE_CODE);
Guido van Rossumb700df92000-03-31 14:59:30 +0000282}
283
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300284/*[clinic input]
285_sre.getlower -> int
286
287 character: int
288 flags: int
289 /
290
291[clinic start generated code]*/
292
293static int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300294_sre_getlower_impl(PyObject *module, int character, int flags)
295/*[clinic end generated code: output=47eebc4c1214feb5 input=087d2f1c44bbca6f]*/
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000296{
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000297 if (flags & SRE_FLAG_LOCALE)
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300298 return sre_lower_locale(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000299 if (flags & SRE_FLAG_UNICODE)
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300300 return sre_lower_unicode(character);
301 return sre_lower(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000302}
303
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000304LOCAL(void)
305state_reset(SRE_STATE* state)
306{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000307 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000308 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000309
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000310 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000311 state->lastindex = -1;
312
313 state->repeat = NULL;
314
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000315 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000316}
317
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000318static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200319getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300320 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600321 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000322{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000323 /* given a python object, return a data pointer, a length (in
324 characters), and a character size. return NULL if the object
325 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000326
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000327 /* Unicode objects do not support the buffer API. So, get the data
328 directly instead. */
329 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200330 if (PyUnicode_READY(string) == -1)
331 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200332 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200333 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300334 *p_isbytes = 0;
335 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000336 }
337
Victor Stinner0058b862011-09-29 03:27:47 +0200338 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300339 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200340 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300341 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000342 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000343
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300344 *p_length = view->len;
345 *p_charsize = 1;
346 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000347
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300348 if (view->buf == NULL) {
349 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
350 PyBuffer_Release(view);
351 view->buf = NULL;
352 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000353 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300354 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000355}
356
357LOCAL(PyObject*)
358state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000359 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000360{
361 /* prepare state object */
362
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000363 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300364 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000365 void* ptr;
366
367 memset(state, 0, sizeof(SRE_STATE));
368
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300369 state->mark = PyMem_New(void *, pattern->groups * 2);
370 if (!state->mark) {
371 PyErr_NoMemory();
372 goto err;
373 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000374 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000375 state->lastindex = -1;
376
Benjamin Petersone48944b2012-03-07 14:50:25 -0600377 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300378 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000379 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600380 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000381
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300382 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600383 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200384 "cannot use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600385 goto err;
386 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300387 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600388 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200389 "cannot use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600390 goto err;
391 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000392
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000393 /* adjust boundaries */
394 if (start < 0)
395 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000396 else if (start > length)
397 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000398
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000399 if (end < 0)
400 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000401 else if (end > length)
402 end = length;
403
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300404 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000405 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000406
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000407 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000408
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000409 state->start = (void*) ((char*) ptr + start * state->charsize);
410 state->end = (void*) ((char*) ptr + end * state->charsize);
411
412 Py_INCREF(string);
413 state->string = string;
414 state->pos = start;
415 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000416
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200417 if (pattern->flags & SRE_FLAG_LOCALE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000418 state->lower = sre_lower_locale;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200419 state->upper = sre_upper_locale;
420 }
421 else if (pattern->flags & SRE_FLAG_UNICODE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000422 state->lower = sre_lower_unicode;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200423 state->upper = sre_upper_unicode;
424 }
425 else {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000426 state->lower = sre_lower;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200427 state->upper = sre_upper;
428 }
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000429
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000430 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600431 err:
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300432 PyMem_Del(state->mark);
433 state->mark = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600434 if (state->buffer.buf)
435 PyBuffer_Release(&state->buffer);
436 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000437}
438
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000439LOCAL(void)
440state_fini(SRE_STATE* state)
441{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600442 if (state->buffer.buf)
443 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000444 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000445 data_stack_dealloc(state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300446 PyMem_Del(state->mark);
447 state->mark = NULL;
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000448}
449
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000450/* calculate offset from start of string */
451#define STATE_OFFSET(state, member)\
452 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
453
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000454LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300455getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300456 PyObject* string, Py_ssize_t start, Py_ssize_t end)
457{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300458 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300459 if (PyBytes_CheckExact(string) &&
460 start == 0 && end == PyBytes_GET_SIZE(string)) {
461 Py_INCREF(string);
462 return string;
463 }
464 return PyBytes_FromStringAndSize(
465 (const char *)ptr + start, end - start);
466 }
467 else {
468 return PyUnicode_Substring(string, start, end);
469 }
470}
471
472LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000473state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000474{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000475 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000476
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000477 index = (index - 1) * 2;
478
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000479 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000480 if (empty)
481 /* want empty string */
482 i = j = 0;
483 else {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200484 Py_RETURN_NONE;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000485 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000486 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000487 i = STATE_OFFSET(state, state->mark[index]);
488 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000489 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000490
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300491 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000492}
493
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000494static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100495pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000496{
497 switch (status) {
498 case SRE_ERROR_RECURSION_LIMIT:
Yury Selivanovf488fb42015-07-03 01:04:23 -0400499 /* This error code seems to be unused. */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000500 PyErr_SetString(
Yury Selivanovf488fb42015-07-03 01:04:23 -0400501 PyExc_RecursionError,
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000502 "maximum recursion limit exceeded"
503 );
504 break;
505 case SRE_ERROR_MEMORY:
506 PyErr_NoMemory();
507 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000508 case SRE_ERROR_INTERRUPTED:
509 /* An exception has already been raised, so let it fly */
510 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000511 default:
512 /* other error codes indicate compiler/engine bugs */
513 PyErr_SetString(
514 PyExc_RuntimeError,
515 "internal error in regular expression engine"
516 );
517 }
518}
519
Guido van Rossumb700df92000-03-31 14:59:30 +0000520static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000521pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000522{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000523 if (self->weakreflist != NULL)
524 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000525 Py_XDECREF(self->pattern);
526 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000527 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000528 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000529}
530
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300531LOCAL(Py_ssize_t)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300532sre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300533{
534 if (state->charsize == 1)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300535 return sre_ucs1_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300536 if (state->charsize == 2)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300537 return sre_ucs2_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300538 assert(state->charsize == 4);
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300539 return sre_ucs4_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300540}
541
542LOCAL(Py_ssize_t)
543sre_search(SRE_STATE* state, SRE_CODE* pattern)
544{
545 if (state->charsize == 1)
546 return sre_ucs1_search(state, pattern);
547 if (state->charsize == 2)
548 return sre_ucs2_search(state, pattern);
549 assert(state->charsize == 4);
550 return sre_ucs4_search(state, pattern);
551}
552
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300553/*[clinic input]
554_sre.SRE_Pattern.match
555
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200556 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300557 pos: Py_ssize_t = 0
558 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300559
560Matches zero or more characters at the beginning of the string.
561[clinic start generated code]*/
562
Larry Hastings16c51912014-01-07 11:53:01 -0800563static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300564_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200565 Py_ssize_t pos, Py_ssize_t endpos)
566/*[clinic end generated code: output=ea2d838888510661 input=a2ba191647abebe5]*/
Larry Hastings16c51912014-01-07 11:53:01 -0800567{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000568 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100569 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300570 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000571
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300572 if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000573 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000574
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000575 state.ptr = state.start;
576
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000577 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
578
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300579 status = sre_match(&state, PatternObject_GetCode(self), 0);
Guido van Rossumb700df92000-03-31 14:59:30 +0000580
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000581 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300582 if (PyErr_Occurred()) {
583 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000584 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300585 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000586
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300587 match = pattern_new_match(self, &state, status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000588 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300589 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000590}
591
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300592/*[clinic input]
593_sre.SRE_Pattern.fullmatch
594
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200595 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300596 pos: Py_ssize_t = 0
597 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300598
599Matches against all of the string
600[clinic start generated code]*/
601
602static PyObject *
603_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200604 Py_ssize_t pos, Py_ssize_t endpos)
605/*[clinic end generated code: output=5833c47782a35f4a input=a6f640614aaefceb]*/
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200606{
607 SRE_STATE state;
608 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300609 PyObject *match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200610
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300611 if (!state_init(&state, self, string, pos, endpos))
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200612 return NULL;
613
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200614 state.ptr = state.start;
615
616 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
617
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300618 status = sre_match(&state, PatternObject_GetCode(self), 1);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200619
620 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300621 if (PyErr_Occurred()) {
622 state_fini(&state);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200623 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300624 }
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200625
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300626 match = pattern_new_match(self, &state, status);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200627 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300628 return match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200629}
630
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300631/*[clinic input]
632_sre.SRE_Pattern.search
633
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200634 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300635 pos: Py_ssize_t = 0
636 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300637
638Scan through string looking for a match, and return a corresponding match object instance.
639
640Return None if no position in the string matches.
641[clinic start generated code]*/
642
643static PyObject *
644_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200645 Py_ssize_t pos, Py_ssize_t endpos)
646/*[clinic end generated code: output=25f302a644e951e8 input=4ae5cb7dc38fed1b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000647{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000648 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100649 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300650 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000651
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300652 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000653 return NULL;
654
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000655 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
656
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300657 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000658
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000659 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
660
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300661 if (PyErr_Occurred()) {
662 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000663 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300664 }
Thomas Wouters89f507f2006-12-13 04:49:30 +0000665
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300666 match = pattern_new_match(self, &state, status);
667 state_fini(&state);
668 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000669}
670
671static PyObject*
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200672call(const char* module, const char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000673{
674 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000675 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000676 PyObject* func;
677 PyObject* result;
678
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000679 if (!args)
680 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000681 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000682 if (!name)
683 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000684 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000685 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000686 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000687 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000688 func = PyObject_GetAttrString(mod, function);
689 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000690 if (!func)
691 return NULL;
692 result = PyObject_CallObject(func, args);
693 Py_DECREF(func);
694 Py_DECREF(args);
695 return result;
696}
697
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000698#ifdef USE_BUILTIN_COPY
699static int
700deepcopy(PyObject** object, PyObject* memo)
701{
702 PyObject* copy;
703
704 copy = call(
705 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000706 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000707 );
708 if (!copy)
709 return 0;
710
Serhiy Storchaka57a01d32016-04-10 18:05:40 +0300711 Py_SETREF(*object, copy);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000712
713 return 1; /* success */
714}
715#endif
716
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300717/*[clinic input]
718_sre.SRE_Pattern.findall
719
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200720 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300721 pos: Py_ssize_t = 0
722 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300723
724Return a list of all non-overlapping matches of pattern in string.
725[clinic start generated code]*/
726
727static PyObject *
728_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200729 Py_ssize_t pos, Py_ssize_t endpos)
730/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000731{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000732 SRE_STATE state;
733 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100734 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000735 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000736
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300737 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000738 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000739
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000740 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000741 if (!list) {
742 state_fini(&state);
743 return NULL;
744 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000745
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000746 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000747
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000748 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000749
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000750 state_reset(&state);
751
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000752 state.ptr = state.start;
753
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300754 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300755 if (PyErr_Occurred())
756 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000757
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000758 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000759 if (status == 0)
760 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000761 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000762 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000763 }
Tim Peters3d563502006-01-21 02:47:53 +0000764
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000765 /* don't bother to build a match object */
766 switch (self->groups) {
767 case 0:
768 b = STATE_OFFSET(&state, state.start);
769 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300770 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300771 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000772 if (!item)
773 goto error;
774 break;
775 case 1:
776 item = state_getslice(&state, 1, string, 1);
777 if (!item)
778 goto error;
779 break;
780 default:
781 item = PyTuple_New(self->groups);
782 if (!item)
783 goto error;
784 for (i = 0; i < self->groups; i++) {
785 PyObject* o = state_getslice(&state, i+1, string, 1);
786 if (!o) {
787 Py_DECREF(item);
788 goto error;
789 }
790 PyTuple_SET_ITEM(item, i, o);
791 }
792 break;
793 }
794
795 status = PyList_Append(list, item);
796 Py_DECREF(item);
797 if (status < 0)
798 goto error;
799
800 if (state.ptr == state.start)
801 state.start = (void*) ((char*) state.ptr + state.charsize);
802 else
803 state.start = state.ptr;
804
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000805 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000806
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000807 state_fini(&state);
808 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000809
810error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000811 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000812 state_fini(&state);
813 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000814
Guido van Rossumb700df92000-03-31 14:59:30 +0000815}
816
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300817/*[clinic input]
818_sre.SRE_Pattern.finditer
819
820 string: object
821 pos: Py_ssize_t = 0
822 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
823
824Return an iterator over all non-overlapping matches for the RE pattern in string.
825
826For each match, the iterator returns a match object.
827[clinic start generated code]*/
828
829static PyObject *
830_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
831 Py_ssize_t pos, Py_ssize_t endpos)
832/*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
Fredrik Lundh703ce812001-10-24 22:16:30 +0000833{
834 PyObject* scanner;
835 PyObject* search;
836 PyObject* iterator;
837
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300838 scanner = pattern_scanner(self, string, pos, endpos);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000839 if (!scanner)
840 return NULL;
841
842 search = PyObject_GetAttrString(scanner, "search");
843 Py_DECREF(scanner);
844 if (!search)
845 return NULL;
846
847 iterator = PyCallIter_New(search, Py_None);
848 Py_DECREF(search);
849
850 return iterator;
851}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000852
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300853/*[clinic input]
854_sre.SRE_Pattern.scanner
855
856 string: object
857 pos: Py_ssize_t = 0
858 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
859
860[clinic start generated code]*/
861
862static PyObject *
863_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
864 Py_ssize_t pos, Py_ssize_t endpos)
865/*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
866{
867 return pattern_scanner(self, string, pos, endpos);
868}
869
870/*[clinic input]
871_sre.SRE_Pattern.split
872
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200873 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300874 maxsplit: Py_ssize_t = 0
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300875
876Split string by the occurrences of pattern.
877[clinic start generated code]*/
878
879static PyObject *
880_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200881 Py_ssize_t maxsplit)
882/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000883{
884 SRE_STATE state;
885 PyObject* list;
886 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100887 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000888 Py_ssize_t n;
889 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000890 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000891
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200892 assert(self->codesize != 0);
893 if (self->code[0] != SRE_OP_INFO || self->code[3] == 0) {
894 if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) {
895 PyErr_SetString(PyExc_ValueError,
896 "split() requires a non-empty pattern match.");
897 return NULL;
898 }
899 if (PyErr_WarnEx(PyExc_FutureWarning,
900 "split() requires a non-empty pattern match.",
901 1) < 0)
902 return NULL;
903 }
904
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300905 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000906 return NULL;
907
908 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000909 if (!list) {
910 state_fini(&state);
911 return NULL;
912 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000913
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000914 n = 0;
915 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000916
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000917 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000918
919 state_reset(&state);
920
921 state.ptr = state.start;
922
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300923 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300924 if (PyErr_Occurred())
925 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000926
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000927 if (status <= 0) {
928 if (status == 0)
929 break;
930 pattern_error(status);
931 goto error;
932 }
Tim Peters3d563502006-01-21 02:47:53 +0000933
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000934 if (state.start == state.ptr) {
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +0300935 if (last == state.end || state.ptr == state.end)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000936 break;
937 /* skip one character */
938 state.start = (void*) ((char*) state.ptr + state.charsize);
939 continue;
940 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000941
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000942 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300943 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000944 string, STATE_OFFSET(&state, last),
945 STATE_OFFSET(&state, state.start)
946 );
947 if (!item)
948 goto error;
949 status = PyList_Append(list, item);
950 Py_DECREF(item);
951 if (status < 0)
952 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000953
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000954 /* add groups (if any) */
955 for (i = 0; i < self->groups; i++) {
956 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000957 if (!item)
958 goto error;
959 status = PyList_Append(list, item);
960 Py_DECREF(item);
961 if (status < 0)
962 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000963 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000964
965 n = n + 1;
966
967 last = state.start = state.ptr;
968
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000969 }
970
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000971 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300972 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000973 string, STATE_OFFSET(&state, last), state.endpos
974 );
975 if (!item)
976 goto error;
977 status = PyList_Append(list, item);
978 Py_DECREF(item);
979 if (status < 0)
980 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000981
982 state_fini(&state);
983 return list;
984
985error:
986 Py_DECREF(list);
987 state_fini(&state);
988 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000989
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000990}
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000991
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000992static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000993pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000994 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000995{
996 SRE_STATE state;
997 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +0300998 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000999 PyObject* item;
1000 PyObject* filter;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001001 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001002 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01001003 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001004 Py_ssize_t n;
1005 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001006 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001007 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001008 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001009
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001010 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001011 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001012 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001013 Py_INCREF(filter);
1014 filter_is_callable = 1;
1015 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001016 /* if not callable, check if it's a literal string */
1017 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001018 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001019 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001020 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001021 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001022 if (charsize == 1)
1023 literal = memchr(ptr, '\\', n) == NULL;
1024 else
1025 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001026 } else {
1027 PyErr_Clear();
1028 literal = 0;
1029 }
Benjamin Petersone48944b2012-03-07 14:50:25 -06001030 if (view.buf)
1031 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001032 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001033 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001034 Py_INCREF(filter);
1035 filter_is_callable = 0;
1036 } else {
1037 /* not a literal; hand it over to the template compiler */
1038 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001039 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001040 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001041 );
1042 if (!filter)
1043 return NULL;
1044 filter_is_callable = PyCallable_Check(filter);
1045 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00001046 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001047
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001048 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001049 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001050 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001051 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001052
1053 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001054 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001055 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001056 state_fini(&state);
1057 return NULL;
1058 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001059
1060 n = i = 0;
1061
1062 while (!count || n < count) {
1063
1064 state_reset(&state);
1065
1066 state.ptr = state.start;
1067
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001068 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001069 if (PyErr_Occurred())
1070 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001071
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001072 if (status <= 0) {
1073 if (status == 0)
1074 break;
1075 pattern_error(status);
1076 goto error;
1077 }
Tim Peters3d563502006-01-21 02:47:53 +00001078
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001079 b = STATE_OFFSET(&state, state.start);
1080 e = STATE_OFFSET(&state, state.ptr);
1081
1082 if (i < b) {
1083 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001084 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001085 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001086 if (!item)
1087 goto error;
1088 status = PyList_Append(list, item);
1089 Py_DECREF(item);
1090 if (status < 0)
1091 goto error;
1092
1093 } else if (i == b && i == e && n > 0)
1094 /* ignore empty match on latest position */
1095 goto next;
1096
1097 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001098 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001099 match = pattern_new_match(self, &state, 1);
1100 if (!match)
1101 goto error;
Victor Stinner7bfb42d2016-12-05 17:04:32 +01001102 item = PyObject_CallFunctionObjArgs(filter, match, NULL);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001103 Py_DECREF(match);
1104 if (!item)
1105 goto error;
1106 } else {
1107 /* filter is literal string */
1108 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001109 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001110 }
1111
1112 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001113 if (item != Py_None) {
1114 status = PyList_Append(list, item);
1115 Py_DECREF(item);
1116 if (status < 0)
1117 goto error;
1118 }
Tim Peters3d563502006-01-21 02:47:53 +00001119
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001120 i = e;
1121 n = n + 1;
1122
1123next:
1124 /* move on */
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001125 if (state.ptr == state.end)
1126 break;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001127 if (state.ptr == state.start)
1128 state.start = (void*) ((char*) state.ptr + state.charsize);
1129 else
1130 state.start = state.ptr;
1131
1132 }
1133
1134 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001135 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001136 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001137 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001138 if (!item)
1139 goto error;
1140 status = PyList_Append(list, item);
1141 Py_DECREF(item);
1142 if (status < 0)
1143 goto error;
1144 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001145
1146 state_fini(&state);
1147
Guido van Rossum4e173842001-12-07 04:25:10 +00001148 Py_DECREF(filter);
1149
Fredrik Lundhdac58492001-10-21 21:48:30 +00001150 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001151 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001152 if (!joiner) {
1153 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001154 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001155 }
1156 if (PyList_GET_SIZE(list) == 0) {
1157 Py_DECREF(list);
1158 item = joiner;
1159 }
1160 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001161 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001162 item = _PyBytes_Join(joiner, list);
1163 else
1164 item = PyUnicode_Join(joiner, list);
1165 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001166 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001167 if (!item)
1168 return NULL;
1169 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001170
1171 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001172 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001173
1174 return item;
1175
1176error:
1177 Py_DECREF(list);
1178 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001179 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001180 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001181
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001182}
1183
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001184/*[clinic input]
1185_sre.SRE_Pattern.sub
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001186
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001187 repl: object
1188 string: object
1189 count: Py_ssize_t = 0
1190
1191Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1192[clinic start generated code]*/
1193
1194static PyObject *
1195_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1196 PyObject *string, Py_ssize_t count)
1197/*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1198{
1199 return pattern_subx(self, repl, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001200}
1201
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001202/*[clinic input]
1203_sre.SRE_Pattern.subn
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001204
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001205 repl: object
1206 string: object
1207 count: Py_ssize_t = 0
1208
1209Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1210[clinic start generated code]*/
1211
1212static PyObject *
1213_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1214 PyObject *string, Py_ssize_t count)
1215/*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1216{
1217 return pattern_subx(self, repl, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001218}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001219
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001220/*[clinic input]
1221_sre.SRE_Pattern.__copy__
1222
1223[clinic start generated code]*/
1224
1225static PyObject *
1226_sre_SRE_Pattern___copy___impl(PatternObject *self)
1227/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001228{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001229#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001230 PatternObject* copy;
1231 int offset;
1232
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001233 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
1234 if (!copy)
1235 return NULL;
1236
1237 offset = offsetof(PatternObject, groups);
1238
1239 Py_XINCREF(self->groupindex);
1240 Py_XINCREF(self->indexgroup);
1241 Py_XINCREF(self->pattern);
1242
1243 memcpy((char*) copy + offset, (char*) self + offset,
1244 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00001245 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001246
1247 return (PyObject*) copy;
1248#else
1249 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
1250 return NULL;
1251#endif
1252}
1253
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001254/*[clinic input]
1255_sre.SRE_Pattern.__deepcopy__
1256
1257 memo: object
1258
1259[clinic start generated code]*/
1260
1261static PyObject *
1262_sre_SRE_Pattern___deepcopy___impl(PatternObject *self, PyObject *memo)
1263/*[clinic end generated code: output=75efe69bd12c5d7d input=3959719482c07f70]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001264{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001265#ifdef USE_BUILTIN_COPY
1266 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00001267
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001268 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001269 if (!copy)
1270 return NULL;
1271
1272 if (!deepcopy(&copy->groupindex, memo) ||
1273 !deepcopy(&copy->indexgroup, memo) ||
1274 !deepcopy(&copy->pattern, memo)) {
1275 Py_DECREF(copy);
1276 return NULL;
1277 }
1278
1279#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001280 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
1281 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001282#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001283}
1284
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001285static PyObject *
1286pattern_repr(PatternObject *obj)
1287{
1288 static const struct {
1289 const char *name;
1290 int value;
1291 } flag_names[] = {
1292 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1293 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1294 {"re.LOCALE", SRE_FLAG_LOCALE},
1295 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1296 {"re.DOTALL", SRE_FLAG_DOTALL},
1297 {"re.UNICODE", SRE_FLAG_UNICODE},
1298 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1299 {"re.DEBUG", SRE_FLAG_DEBUG},
1300 {"re.ASCII", SRE_FLAG_ASCII},
1301 };
1302 PyObject *result = NULL;
1303 PyObject *flag_items;
Victor Stinner706768c2014-08-16 01:03:39 +02001304 size_t i;
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001305 int flags = obj->flags;
1306
1307 /* Omit re.UNICODE for valid string patterns. */
1308 if (obj->isbytes == 0 &&
1309 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1310 SRE_FLAG_UNICODE)
1311 flags &= ~SRE_FLAG_UNICODE;
1312
1313 flag_items = PyList_New(0);
1314 if (!flag_items)
1315 return NULL;
1316
1317 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1318 if (flags & flag_names[i].value) {
1319 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1320 if (!item)
1321 goto done;
1322
1323 if (PyList_Append(flag_items, item) < 0) {
1324 Py_DECREF(item);
1325 goto done;
1326 }
1327 Py_DECREF(item);
1328 flags &= ~flag_names[i].value;
1329 }
1330 }
1331 if (flags) {
1332 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1333 if (!item)
1334 goto done;
1335
1336 if (PyList_Append(flag_items, item) < 0) {
1337 Py_DECREF(item);
1338 goto done;
1339 }
1340 Py_DECREF(item);
1341 }
1342
1343 if (PyList_Size(flag_items) > 0) {
1344 PyObject *flags_result;
1345 PyObject *sep = PyUnicode_FromString("|");
1346 if (!sep)
1347 goto done;
1348 flags_result = PyUnicode_Join(sep, flag_items);
1349 Py_DECREF(sep);
1350 if (!flags_result)
1351 goto done;
1352 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1353 obj->pattern, flags_result);
1354 Py_DECREF(flags_result);
1355 }
1356 else {
1357 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1358 }
1359
1360done:
1361 Py_DECREF(flag_items);
1362 return result;
1363}
1364
Raymond Hettinger94478742004-09-24 04:31:19 +00001365PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
1366
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001367/* PatternObject's 'groupindex' method. */
1368static PyObject *
1369pattern_groupindex(PatternObject *self)
1370{
1371 return PyDictProxy_New(self->groupindex);
1372}
1373
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001374static int _validate(PatternObject *self); /* Forward */
1375
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001376/*[clinic input]
1377_sre.compile
1378
1379 pattern: object
1380 flags: int
1381 code: object(subclass_of='&PyList_Type')
1382 groups: Py_ssize_t
Victor Stinner726a57d2016-11-22 23:04:39 +01001383 groupindex: object(subclass_of='&PyDict_Type')
1384 indexgroup: object(subclass_of='&PyTuple_Type')
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001385
1386[clinic start generated code]*/
1387
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001388static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001389_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001390 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1391 PyObject *indexgroup)
Victor Stinner726a57d2016-11-22 23:04:39 +01001392/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001393{
1394 /* "compile" pattern descriptor to pattern object */
1395
1396 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001397 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001398
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001399 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001400 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001401 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1402 if (!self)
1403 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001404 self->weakreflist = NULL;
1405 self->pattern = NULL;
1406 self->groupindex = NULL;
1407 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001408
1409 self->codesize = n;
1410
1411 for (i = 0; i < n; i++) {
1412 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001413 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001414 self->code[i] = (SRE_CODE) value;
1415 if ((unsigned long) self->code[i] != value) {
1416 PyErr_SetString(PyExc_OverflowError,
1417 "regular expression code size limit exceeded");
1418 break;
1419 }
1420 }
1421
1422 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001423 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001424 return NULL;
1425 }
1426
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001427 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001428 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001429 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001430 else {
1431 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001432 int charsize;
1433 Py_buffer view;
1434 view.buf = NULL;
1435 if (!getstring(pattern, &p_length, &self->isbytes,
1436 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001437 Py_DECREF(self);
1438 return NULL;
1439 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001440 if (view.buf)
1441 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001442 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001443
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001444 Py_INCREF(pattern);
1445 self->pattern = pattern;
1446
1447 self->flags = flags;
1448
1449 self->groups = groups;
1450
Victor Stinnerb44fb122016-11-21 16:35:08 +01001451 Py_INCREF(groupindex);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001452 self->groupindex = groupindex;
1453
Victor Stinnerb44fb122016-11-21 16:35:08 +01001454 Py_INCREF(indexgroup);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001455 self->indexgroup = indexgroup;
1456
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001457 if (!_validate(self)) {
1458 Py_DECREF(self);
1459 return NULL;
1460 }
1461
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001462 return (PyObject*) self;
1463}
1464
Guido van Rossumb700df92000-03-31 14:59:30 +00001465/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001466/* Code validation */
1467
1468/* To learn more about this code, have a look at the _compile() function in
1469 Lib/sre_compile.py. The validation functions below checks the code array
1470 for conformance with the code patterns generated there.
1471
1472 The nice thing about the generated code is that it is position-independent:
1473 all jumps are relative jumps forward. Also, jumps don't cross each other:
1474 the target of a later jump is always earlier than the target of an earlier
1475 jump. IOW, this is okay:
1476
1477 J---------J-------T--------T
1478 \ \_____/ /
1479 \______________________/
1480
1481 but this is not:
1482
1483 J---------J-------T--------T
1484 \_________\_____/ /
1485 \____________/
1486
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001487 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001488*/
1489
1490/* Defining this one enables tracing of the validator */
1491#undef VVERBOSE
1492
1493/* Trace macro for the validator */
1494#if defined(VVERBOSE)
1495#define VTRACE(v) printf v
1496#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001497#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001498#endif
1499
1500/* Report failure */
1501#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1502
1503/* Extract opcode, argument, or skip count from code array */
1504#define GET_OP \
1505 do { \
1506 VTRACE(("%p: ", code)); \
1507 if (code >= end) FAIL; \
1508 op = *code++; \
1509 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1510 } while (0)
1511#define GET_ARG \
1512 do { \
1513 VTRACE(("%p= ", code)); \
1514 if (code >= end) FAIL; \
1515 arg = *code++; \
1516 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1517 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001518#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001519 do { \
1520 VTRACE(("%p= ", code)); \
1521 if (code >= end) FAIL; \
1522 skip = *code; \
1523 VTRACE(("%lu (skip to %p)\n", \
1524 (unsigned long)skip, code+skip)); \
Benjamin Petersonca470632016-09-06 13:47:26 -07001525 if (skip-adj > (uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001526 FAIL; \
1527 code++; \
1528 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001529#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001530
1531static int
1532_validate_charset(SRE_CODE *code, SRE_CODE *end)
1533{
1534 /* Some variables are manipulated by the macros above */
1535 SRE_CODE op;
1536 SRE_CODE arg;
1537 SRE_CODE offset;
1538 int i;
1539
1540 while (code < end) {
1541 GET_OP;
1542 switch (op) {
1543
1544 case SRE_OP_NEGATE:
1545 break;
1546
1547 case SRE_OP_LITERAL:
1548 GET_ARG;
1549 break;
1550
1551 case SRE_OP_RANGE:
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +02001552 case SRE_OP_RANGE_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001553 GET_ARG;
1554 GET_ARG;
1555 break;
1556
1557 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001558 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Benjamin Petersonca470632016-09-06 13:47:26 -07001559 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001560 FAIL;
1561 code += offset;
1562 break;
1563
1564 case SRE_OP_BIGCHARSET:
1565 GET_ARG; /* Number of blocks */
1566 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001567 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001568 FAIL;
1569 /* Make sure that each byte points to a valid block */
1570 for (i = 0; i < 256; i++) {
1571 if (((unsigned char *)code)[i] >= arg)
1572 FAIL;
1573 }
1574 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001575 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Benjamin Petersonca470632016-09-06 13:47:26 -07001576 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001577 FAIL;
1578 code += offset;
1579 break;
1580
1581 case SRE_OP_CATEGORY:
1582 GET_ARG;
1583 switch (arg) {
1584 case SRE_CATEGORY_DIGIT:
1585 case SRE_CATEGORY_NOT_DIGIT:
1586 case SRE_CATEGORY_SPACE:
1587 case SRE_CATEGORY_NOT_SPACE:
1588 case SRE_CATEGORY_WORD:
1589 case SRE_CATEGORY_NOT_WORD:
1590 case SRE_CATEGORY_LINEBREAK:
1591 case SRE_CATEGORY_NOT_LINEBREAK:
1592 case SRE_CATEGORY_LOC_WORD:
1593 case SRE_CATEGORY_LOC_NOT_WORD:
1594 case SRE_CATEGORY_UNI_DIGIT:
1595 case SRE_CATEGORY_UNI_NOT_DIGIT:
1596 case SRE_CATEGORY_UNI_SPACE:
1597 case SRE_CATEGORY_UNI_NOT_SPACE:
1598 case SRE_CATEGORY_UNI_WORD:
1599 case SRE_CATEGORY_UNI_NOT_WORD:
1600 case SRE_CATEGORY_UNI_LINEBREAK:
1601 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1602 break;
1603 default:
1604 FAIL;
1605 }
1606 break;
1607
1608 default:
1609 FAIL;
1610
1611 }
1612 }
1613
1614 return 1;
1615}
1616
1617static int
1618_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1619{
1620 /* Some variables are manipulated by the macros above */
1621 SRE_CODE op;
1622 SRE_CODE arg;
1623 SRE_CODE skip;
1624
1625 VTRACE(("code=%p, end=%p\n", code, end));
1626
1627 if (code > end)
1628 FAIL;
1629
1630 while (code < end) {
1631 GET_OP;
1632 switch (op) {
1633
1634 case SRE_OP_MARK:
1635 /* We don't check whether marks are properly nested; the
1636 sre_match() code is robust even if they don't, and the worst
1637 you can get is nonsensical match results. */
1638 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001639 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001640 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1641 FAIL;
1642 }
1643 break;
1644
1645 case SRE_OP_LITERAL:
1646 case SRE_OP_NOT_LITERAL:
1647 case SRE_OP_LITERAL_IGNORE:
1648 case SRE_OP_NOT_LITERAL_IGNORE:
1649 GET_ARG;
1650 /* The arg is just a character, nothing to check */
1651 break;
1652
1653 case SRE_OP_SUCCESS:
1654 case SRE_OP_FAILURE:
1655 /* Nothing to check; these normally end the matching process */
1656 break;
1657
1658 case SRE_OP_AT:
1659 GET_ARG;
1660 switch (arg) {
1661 case SRE_AT_BEGINNING:
1662 case SRE_AT_BEGINNING_STRING:
1663 case SRE_AT_BEGINNING_LINE:
1664 case SRE_AT_END:
1665 case SRE_AT_END_LINE:
1666 case SRE_AT_END_STRING:
1667 case SRE_AT_BOUNDARY:
1668 case SRE_AT_NON_BOUNDARY:
1669 case SRE_AT_LOC_BOUNDARY:
1670 case SRE_AT_LOC_NON_BOUNDARY:
1671 case SRE_AT_UNI_BOUNDARY:
1672 case SRE_AT_UNI_NON_BOUNDARY:
1673 break;
1674 default:
1675 FAIL;
1676 }
1677 break;
1678
1679 case SRE_OP_ANY:
1680 case SRE_OP_ANY_ALL:
1681 /* These have no operands */
1682 break;
1683
1684 case SRE_OP_IN:
1685 case SRE_OP_IN_IGNORE:
1686 GET_SKIP;
1687 /* Stop 1 before the end; we check the FAILURE below */
1688 if (!_validate_charset(code, code+skip-2))
1689 FAIL;
1690 if (code[skip-2] != SRE_OP_FAILURE)
1691 FAIL;
1692 code += skip-1;
1693 break;
1694
1695 case SRE_OP_INFO:
1696 {
1697 /* A minimal info field is
1698 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1699 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1700 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001701 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001702 SRE_CODE *newcode;
1703 GET_SKIP;
1704 newcode = code+skip-1;
1705 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001706 GET_ARG;
1707 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001708 /* Check that only valid flags are present */
1709 if ((flags & ~(SRE_INFO_PREFIX |
1710 SRE_INFO_LITERAL |
1711 SRE_INFO_CHARSET)) != 0)
1712 FAIL;
1713 /* PREFIX and CHARSET are mutually exclusive */
1714 if ((flags & SRE_INFO_PREFIX) &&
1715 (flags & SRE_INFO_CHARSET))
1716 FAIL;
1717 /* LITERAL implies PREFIX */
1718 if ((flags & SRE_INFO_LITERAL) &&
1719 !(flags & SRE_INFO_PREFIX))
1720 FAIL;
1721 /* Validate the prefix */
1722 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001723 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001724 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001725 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001726 /* Here comes the prefix string */
Benjamin Petersonca470632016-09-06 13:47:26 -07001727 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001728 FAIL;
1729 code += prefix_len;
1730 /* And here comes the overlap table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001731 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001732 FAIL;
1733 /* Each overlap value should be < prefix_len */
1734 for (i = 0; i < prefix_len; i++) {
1735 if (code[i] >= prefix_len)
1736 FAIL;
1737 }
1738 code += prefix_len;
1739 }
1740 /* Validate the charset */
1741 if (flags & SRE_INFO_CHARSET) {
1742 if (!_validate_charset(code, newcode-1))
1743 FAIL;
1744 if (newcode[-1] != SRE_OP_FAILURE)
1745 FAIL;
1746 code = newcode;
1747 }
1748 else if (code != newcode) {
1749 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1750 FAIL;
1751 }
1752 }
1753 break;
1754
1755 case SRE_OP_BRANCH:
1756 {
1757 SRE_CODE *target = NULL;
1758 for (;;) {
1759 GET_SKIP;
1760 if (skip == 0)
1761 break;
1762 /* Stop 2 before the end; we check the JUMP below */
1763 if (!_validate_inner(code, code+skip-3, groups))
1764 FAIL;
1765 code += skip-3;
1766 /* Check that it ends with a JUMP, and that each JUMP
1767 has the same target */
1768 GET_OP;
1769 if (op != SRE_OP_JUMP)
1770 FAIL;
1771 GET_SKIP;
1772 if (target == NULL)
1773 target = code+skip-1;
1774 else if (code+skip-1 != target)
1775 FAIL;
1776 }
1777 }
1778 break;
1779
1780 case SRE_OP_REPEAT_ONE:
1781 case SRE_OP_MIN_REPEAT_ONE:
1782 {
1783 SRE_CODE min, max;
1784 GET_SKIP;
1785 GET_ARG; min = arg;
1786 GET_ARG; max = arg;
1787 if (min > max)
1788 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001789 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001790 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001791 if (!_validate_inner(code, code+skip-4, groups))
1792 FAIL;
1793 code += skip-4;
1794 GET_OP;
1795 if (op != SRE_OP_SUCCESS)
1796 FAIL;
1797 }
1798 break;
1799
1800 case SRE_OP_REPEAT:
1801 {
1802 SRE_CODE min, max;
1803 GET_SKIP;
1804 GET_ARG; min = arg;
1805 GET_ARG; max = arg;
1806 if (min > max)
1807 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001808 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001809 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001810 if (!_validate_inner(code, code+skip-3, groups))
1811 FAIL;
1812 code += skip-3;
1813 GET_OP;
1814 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1815 FAIL;
1816 }
1817 break;
1818
1819 case SRE_OP_GROUPREF:
1820 case SRE_OP_GROUPREF_IGNORE:
1821 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001822 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001823 FAIL;
1824 break;
1825
1826 case SRE_OP_GROUPREF_EXISTS:
1827 /* The regex syntax for this is: '(?(group)then|else)', where
1828 'group' is either an integer group number or a group name,
1829 'then' and 'else' are sub-regexes, and 'else' is optional. */
1830 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001831 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001832 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001833 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001834 code--; /* The skip is relative to the first arg! */
1835 /* There are two possibilities here: if there is both a 'then'
1836 part and an 'else' part, the generated code looks like:
1837
1838 GROUPREF_EXISTS
1839 <group>
1840 <skipyes>
1841 ...then part...
1842 JUMP
1843 <skipno>
1844 (<skipyes> jumps here)
1845 ...else part...
1846 (<skipno> jumps here)
1847
1848 If there is only a 'then' part, it looks like:
1849
1850 GROUPREF_EXISTS
1851 <group>
1852 <skip>
1853 ...then part...
1854 (<skip> jumps here)
1855
1856 There is no direct way to decide which it is, and we don't want
1857 to allow arbitrary jumps anywhere in the code; so we just look
1858 for a JUMP opcode preceding our skip target.
1859 */
Benjamin Petersonca470632016-09-06 13:47:26 -07001860 if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001861 code[skip-3] == SRE_OP_JUMP)
1862 {
1863 VTRACE(("both then and else parts present\n"));
1864 if (!_validate_inner(code+1, code+skip-3, groups))
1865 FAIL;
1866 code += skip-2; /* Position after JUMP, at <skipno> */
1867 GET_SKIP;
1868 if (!_validate_inner(code, code+skip-1, groups))
1869 FAIL;
1870 code += skip-1;
1871 }
1872 else {
1873 VTRACE(("only a then part present\n"));
1874 if (!_validate_inner(code+1, code+skip-1, groups))
1875 FAIL;
1876 code += skip-1;
1877 }
1878 break;
1879
1880 case SRE_OP_ASSERT:
1881 case SRE_OP_ASSERT_NOT:
1882 GET_SKIP;
1883 GET_ARG; /* 0 for lookahead, width for lookbehind */
1884 code--; /* Back up over arg to simplify math below */
1885 if (arg & 0x80000000)
1886 FAIL; /* Width too large */
1887 /* Stop 1 before the end; we check the SUCCESS below */
1888 if (!_validate_inner(code+1, code+skip-2, groups))
1889 FAIL;
1890 code += skip-2;
1891 GET_OP;
1892 if (op != SRE_OP_SUCCESS)
1893 FAIL;
1894 break;
1895
1896 default:
1897 FAIL;
1898
1899 }
1900 }
1901
1902 VTRACE(("okay\n"));
1903 return 1;
1904}
1905
1906static int
1907_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1908{
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001909 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1910 code >= end || end[-1] != SRE_OP_SUCCESS)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001911 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001912 return _validate_inner(code, end-1, groups);
1913}
1914
1915static int
1916_validate(PatternObject *self)
1917{
1918 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1919 {
1920 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1921 return 0;
1922 }
1923 else
1924 VTRACE(("Success!\n"));
1925 return 1;
1926}
1927
1928/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001929/* match methods */
1930
1931static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001932match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001933{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001934 Py_XDECREF(self->regs);
1935 Py_XDECREF(self->string);
1936 Py_DECREF(self->pattern);
1937 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001938}
1939
1940static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001941match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001942{
Serhiy Storchaka25324972013-10-16 12:46:28 +03001943 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001944 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001945 Py_buffer view;
1946 PyObject *result;
1947 void* ptr;
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02001948 Py_ssize_t i, j;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001949
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001950 if (index < 0 || index >= self->groups) {
1951 /* raise IndexError if we were given a bad group number */
1952 PyErr_SetString(
1953 PyExc_IndexError,
1954 "no such group"
1955 );
1956 return NULL;
1957 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001958
Fredrik Lundh6f013982000-07-03 18:44:21 +00001959 index *= 2;
1960
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001961 if (self->string == Py_None || self->mark[index] < 0) {
1962 /* return default value if the string or group is undefined */
1963 Py_INCREF(def);
1964 return def;
1965 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001966
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001967 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001968 if (ptr == NULL)
1969 return NULL;
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02001970
1971 i = self->mark[index];
1972 j = self->mark[index+1];
1973 i = Py_MIN(i, length);
1974 j = Py_MIN(j, length);
1975 result = getslice(isbytes, ptr, self->string, i, j);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001976 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001977 PyBuffer_Release(&view);
1978 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001979}
1980
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001981static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001982match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001983{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001984 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001985
Guido van Rossumddefaf32007-01-14 03:31:43 +00001986 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001987 /* Default value */
1988 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001989
Serhiy Storchaka977b3ac2016-06-18 16:48:07 +03001990 if (PyIndex_Check(index)) {
1991 return PyNumber_AsSsize_t(index, NULL);
1992 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001993
Fredrik Lundh6f013982000-07-03 18:44:21 +00001994 i = -1;
1995
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001996 if (self->pattern->groupindex) {
1997 index = PyObject_GetItem(self->pattern->groupindex, index);
1998 if (index) {
Neal Norwitz1fe5f382007-08-31 04:32:55 +00001999 if (PyLong_Check(index))
Christian Heimes217cfd12007-12-02 14:31:20 +00002000 i = PyLong_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00002001 Py_DECREF(index);
2002 } else
2003 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002004 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002005
2006 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002007}
2008
2009static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002010match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002011{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002012 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002013}
2014
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002015/*[clinic input]
2016_sre.SRE_Match.expand
2017
2018 template: object
2019
2020Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
2021[clinic start generated code]*/
2022
2023static PyObject *
2024_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
2025/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002026{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002027 /* delegate to Python code */
2028 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00002029 SRE_PY_MODULE, "_expand",
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002030 PyTuple_Pack(3, self->pattern, self, template)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002031 );
2032}
2033
2034static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002035match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002036{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002037 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002038 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002039
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002040 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002041
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002042 switch (size) {
2043 case 0:
2044 result = match_getslice(self, Py_False, Py_None);
2045 break;
2046 case 1:
2047 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2048 break;
2049 default:
2050 /* fetch multiple items */
2051 result = PyTuple_New(size);
2052 if (!result)
2053 return NULL;
2054 for (i = 0; i < size; i++) {
2055 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002056 self, PyTuple_GET_ITEM(args, i), Py_None
2057 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002058 if (!item) {
2059 Py_DECREF(result);
2060 return NULL;
2061 }
2062 PyTuple_SET_ITEM(result, i, item);
2063 }
2064 break;
2065 }
2066 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002067}
2068
Eric V. Smith605bdae2016-09-11 08:55:43 -04002069static PyObject*
2070match_getitem(MatchObject* self, PyObject* name)
2071{
2072 return match_getslice(self, name, Py_None);
2073}
2074
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002075/*[clinic input]
2076_sre.SRE_Match.groups
2077
2078 default: object = None
2079 Is used for groups that did not participate in the match.
2080
2081Return a tuple containing all the subgroups of the match, from 1.
2082[clinic start generated code]*/
2083
2084static PyObject *
2085_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2086/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002087{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002088 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002089 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002090
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002091 result = PyTuple_New(self->groups-1);
2092 if (!result)
2093 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002094
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002095 for (index = 1; index < self->groups; index++) {
2096 PyObject* item;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002097 item = match_getslice_by_index(self, index, default_value);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002098 if (!item) {
2099 Py_DECREF(result);
2100 return NULL;
2101 }
2102 PyTuple_SET_ITEM(result, index-1, item);
2103 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002104
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002105 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002106}
2107
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002108/*[clinic input]
2109_sre.SRE_Match.groupdict
2110
2111 default: object = None
2112 Is used for groups that did not participate in the match.
2113
2114Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2115[clinic start generated code]*/
2116
2117static PyObject *
2118_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2119/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002120{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002121 PyObject* result;
2122 PyObject* keys;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002123 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002124
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002125 result = PyDict_New();
2126 if (!result || !self->pattern->groupindex)
2127 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002128
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002129 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002130 if (!keys)
2131 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002132
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002133 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002134 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002135 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002136 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002137 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002138 if (!key)
2139 goto failed;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002140 value = match_getslice(self, key, default_value);
Benjamin Peterson3a27b082016-08-15 22:01:41 -07002141 if (!value)
Fredrik Lundh770617b2001-01-14 15:06:11 +00002142 goto failed;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002143 status = PyDict_SetItem(result, key, value);
2144 Py_DECREF(value);
2145 if (status < 0)
2146 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002147 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002148
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002149 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002150
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002151 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002152
2153failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00002154 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002155 Py_DECREF(result);
2156 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002157}
2158
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002159/*[clinic input]
2160_sre.SRE_Match.start -> Py_ssize_t
2161
2162 group: object(c_default="NULL") = 0
2163 /
2164
2165Return index of the start of the substring matched by group.
2166[clinic start generated code]*/
2167
2168static Py_ssize_t
2169_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2170/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002171{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002172 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002173
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002174 if (index < 0 || index >= self->groups) {
2175 PyErr_SetString(
2176 PyExc_IndexError,
2177 "no such group"
2178 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002179 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002180 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002181
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002182 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002183 return self->mark[index*2];
Guido van Rossumb700df92000-03-31 14:59:30 +00002184}
2185
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002186/*[clinic input]
2187_sre.SRE_Match.end -> Py_ssize_t
2188
2189 group: object(c_default="NULL") = 0
2190 /
2191
2192Return index of the end of the substring matched by group.
2193[clinic start generated code]*/
2194
2195static Py_ssize_t
2196_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2197/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002198{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002199 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002200
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002201 if (index < 0 || index >= self->groups) {
2202 PyErr_SetString(
2203 PyExc_IndexError,
2204 "no such group"
2205 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002206 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002207 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002208
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002209 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002210 return self->mark[index*2+1];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002211}
2212
2213LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002214_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002215{
2216 PyObject* pair;
2217 PyObject* item;
2218
2219 pair = PyTuple_New(2);
2220 if (!pair)
2221 return NULL;
2222
Christian Heimes217cfd12007-12-02 14:31:20 +00002223 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002224 if (!item)
2225 goto error;
2226 PyTuple_SET_ITEM(pair, 0, item);
2227
Christian Heimes217cfd12007-12-02 14:31:20 +00002228 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002229 if (!item)
2230 goto error;
2231 PyTuple_SET_ITEM(pair, 1, item);
2232
2233 return pair;
2234
2235 error:
2236 Py_DECREF(pair);
2237 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002238}
2239
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002240/*[clinic input]
2241_sre.SRE_Match.span
2242
2243 group: object(c_default="NULL") = 0
2244 /
2245
2246For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).
2247[clinic start generated code]*/
2248
2249static PyObject *
2250_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2251/*[clinic end generated code: output=f02ae40594d14fe6 input=49092b6008d176d3]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002252{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002253 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002254
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002255 if (index < 0 || index >= self->groups) {
2256 PyErr_SetString(
2257 PyExc_IndexError,
2258 "no such group"
2259 );
2260 return NULL;
2261 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002262
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002263 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002264 return _pair(self->mark[index*2], self->mark[index*2+1]);
2265}
2266
2267static PyObject*
2268match_regs(MatchObject* self)
2269{
2270 PyObject* regs;
2271 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002272 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002273
2274 regs = PyTuple_New(self->groups);
2275 if (!regs)
2276 return NULL;
2277
2278 for (index = 0; index < self->groups; index++) {
2279 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2280 if (!item) {
2281 Py_DECREF(regs);
2282 return NULL;
2283 }
2284 PyTuple_SET_ITEM(regs, index, item);
2285 }
2286
2287 Py_INCREF(regs);
2288 self->regs = regs;
2289
2290 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002291}
2292
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002293/*[clinic input]
2294_sre.SRE_Match.__copy__
2295
2296[clinic start generated code]*/
2297
2298static PyObject *
2299_sre_SRE_Match___copy___impl(MatchObject *self)
2300/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002301{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002302#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002303 MatchObject* copy;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002304 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00002305
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002306 slots = 2 * (self->pattern->groups+1);
2307
2308 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2309 if (!copy)
2310 return NULL;
2311
2312 /* this value a constant, but any compiler should be able to
2313 figure that out all by itself */
2314 offset = offsetof(MatchObject, string);
2315
2316 Py_XINCREF(self->pattern);
2317 Py_XINCREF(self->string);
2318 Py_XINCREF(self->regs);
2319
2320 memcpy((char*) copy + offset, (char*) self + offset,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002321 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002322
2323 return (PyObject*) copy;
2324#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002325 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002326 return NULL;
2327#endif
2328}
2329
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002330/*[clinic input]
2331_sre.SRE_Match.__deepcopy__
2332
2333 memo: object
2334
2335[clinic start generated code]*/
2336
2337static PyObject *
2338_sre_SRE_Match___deepcopy___impl(MatchObject *self, PyObject *memo)
2339/*[clinic end generated code: output=2b657578eb03f4a3 input=b65b72489eac64cc]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002340{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002341#ifdef USE_BUILTIN_COPY
2342 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002343
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002344 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002345 if (!copy)
2346 return NULL;
2347
2348 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2349 !deepcopy(&copy->string, memo) ||
2350 !deepcopy(&copy->regs, memo)) {
2351 Py_DECREF(copy);
2352 return NULL;
2353 }
2354
2355#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002356 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2357 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002358#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002359}
2360
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002361PyDoc_STRVAR(match_doc,
2362"The result of re.match() and re.search().\n\
2363Match objects always have a boolean value of True.");
2364
2365PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002366"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002367 Return subgroup(s) of the match by indices or names.\n\
2368 For 0 returns the entire match.");
2369
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002370static PyObject *
2371match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002372{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002373 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002374 return PyLong_FromSsize_t(self->lastindex);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002375 Py_RETURN_NONE;
Guido van Rossumb700df92000-03-31 14:59:30 +00002376}
2377
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002378static PyObject *
2379match_lastgroup_get(MatchObject *self)
2380{
2381 if (self->pattern->indexgroup && self->lastindex >= 0) {
2382 PyObject* result = PySequence_GetItem(
2383 self->pattern->indexgroup, self->lastindex
2384 );
2385 if (result)
2386 return result;
2387 PyErr_Clear();
2388 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002389 Py_RETURN_NONE;
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002390}
2391
2392static PyObject *
2393match_regs_get(MatchObject *self)
2394{
2395 if (self->regs) {
2396 Py_INCREF(self->regs);
2397 return self->regs;
2398 } else
2399 return match_regs(self);
2400}
2401
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002402static PyObject *
2403match_repr(MatchObject *self)
2404{
2405 PyObject *result;
2406 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2407 if (group0 == NULL)
2408 return NULL;
2409 result = PyUnicode_FromFormat(
2410 "<%s object; span=(%d, %d), match=%.50R>",
2411 Py_TYPE(self)->tp_name,
2412 self->mark[0], self->mark[1], group0);
2413 Py_DECREF(group0);
2414 return result;
2415}
2416
2417
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002418static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002419pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002420{
2421 /* create match object (from state object) */
2422
2423 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002424 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002425 char* base;
2426 int n;
2427
2428 if (status > 0) {
2429
2430 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002431 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002432 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2433 2*(pattern->groups+1));
2434 if (!match)
2435 return NULL;
2436
2437 Py_INCREF(pattern);
2438 match->pattern = pattern;
2439
2440 Py_INCREF(state->string);
2441 match->string = state->string;
2442
2443 match->regs = NULL;
2444 match->groups = pattern->groups+1;
2445
2446 /* fill in group slices */
2447
2448 base = (char*) state->beginning;
2449 n = state->charsize;
2450
2451 match->mark[0] = ((char*) state->start - base) / n;
2452 match->mark[1] = ((char*) state->ptr - base) / n;
2453
2454 for (i = j = 0; i < pattern->groups; i++, j+=2)
2455 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2456 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2457 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2458 } else
2459 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2460
2461 match->pos = state->pos;
2462 match->endpos = state->endpos;
2463
2464 match->lastindex = state->lastindex;
2465
2466 return (PyObject*) match;
2467
2468 } else if (status == 0) {
2469
2470 /* no match */
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002471 Py_RETURN_NONE;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002472
2473 }
2474
2475 /* internal error */
2476 pattern_error(status);
2477 return NULL;
2478}
2479
2480
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002481/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002482/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002483
2484static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002485scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002486{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002487 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002488 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002489 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002490}
2491
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002492/*[clinic input]
2493_sre.SRE_Scanner.match
2494
2495[clinic start generated code]*/
2496
2497static PyObject *
2498_sre_SRE_Scanner_match_impl(ScannerObject *self)
2499/*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002500{
2501 SRE_STATE* state = &self->state;
2502 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002503 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002504
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002505 if (state->start == NULL)
2506 Py_RETURN_NONE;
2507
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002508 state_reset(state);
2509
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002510 state->ptr = state->start;
2511
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03002512 status = sre_match(state, PatternObject_GetCode(self->pattern), 0);
Thomas Wouters89f507f2006-12-13 04:49:30 +00002513 if (PyErr_Occurred())
2514 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002515
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002516 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002517 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002518
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002519 if (status == 0)
2520 state->start = NULL;
2521 else if (state->ptr != state->start)
2522 state->start = state->ptr;
2523 else if (state->ptr != state->end)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002524 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002525 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002526 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002527
2528 return match;
2529}
2530
2531
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002532/*[clinic input]
2533_sre.SRE_Scanner.search
2534
2535[clinic start generated code]*/
2536
2537static PyObject *
2538_sre_SRE_Scanner_search_impl(ScannerObject *self)
2539/*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002540{
2541 SRE_STATE* state = &self->state;
2542 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002543 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002544
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002545 if (state->start == NULL)
2546 Py_RETURN_NONE;
2547
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002548 state_reset(state);
2549
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002550 state->ptr = state->start;
2551
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002552 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002553 if (PyErr_Occurred())
2554 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002555
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002556 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002557 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002558
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002559 if (status == 0)
2560 state->start = NULL;
2561 else if (state->ptr != state->start)
2562 state->start = state->ptr;
2563 else if (state->ptr != state->end)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002564 state->start = (void*) ((char*) state->ptr + state->charsize);
2565 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002566 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002567
2568 return match;
2569}
2570
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002571static PyObject *
2572pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002573{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002574 ScannerObject* scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002575
2576 /* create scanner object */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002577 scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2578 if (!scanner)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002579 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002580 scanner->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002581
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002582 /* create search state object */
2583 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2584 Py_DECREF(scanner);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002585 return NULL;
2586 }
2587
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002588 Py_INCREF(self);
2589 scanner->pattern = (PyObject*) self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002590
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002591 return (PyObject*) scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002592}
2593
Victor Stinnerb44fb122016-11-21 16:35:08 +01002594static Py_hash_t
2595pattern_hash(PatternObject *self)
2596{
2597 Py_hash_t hash, hash2;
2598
2599 hash = PyObject_Hash(self->pattern);
2600 if (hash == -1) {
2601 return -1;
2602 }
2603
2604 hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2605 hash ^= hash2;
2606
2607 hash ^= self->flags;
2608 hash ^= self->isbytes;
2609 hash ^= self->codesize;
2610
2611 if (hash == -1) {
2612 hash = -2;
2613 }
2614 return hash;
2615}
2616
2617static PyObject*
2618pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2619{
2620 PatternObject *left, *right;
2621 int cmp;
2622
2623 if (op != Py_EQ && op != Py_NE) {
2624 Py_RETURN_NOTIMPLEMENTED;
2625 }
2626
2627 if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
2628 Py_RETURN_NOTIMPLEMENTED;
2629 }
Victor Stinnerbcf4dcc2016-11-22 15:30:38 +01002630
2631 if (lefto == righto) {
2632 /* a pattern is equal to itself */
2633 return PyBool_FromLong(op == Py_EQ);
2634 }
2635
Victor Stinnerb44fb122016-11-21 16:35:08 +01002636 left = (PatternObject *)lefto;
2637 right = (PatternObject *)righto;
2638
2639 cmp = (left->flags == right->flags
2640 && left->isbytes == right->isbytes
Victor Stinnere670b2d2016-11-22 15:23:00 +01002641 && left->codesize == right->codesize);
Victor Stinnerb44fb122016-11-21 16:35:08 +01002642 if (cmp) {
2643 /* Compare the code and the pattern because the same pattern can
2644 produce different codes depending on the locale used to compile the
2645 pattern when the re.LOCALE flag is used. Don't compare groups,
2646 indexgroup nor groupindex: they are derivated from the pattern. */
2647 cmp = (memcmp(left->code, right->code,
2648 sizeof(left->code[0]) * left->codesize) == 0);
2649 }
2650 if (cmp) {
2651 cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2652 Py_EQ);
2653 if (cmp < 0) {
2654 return NULL;
2655 }
2656 }
2657 if (op == Py_NE) {
2658 cmp = !cmp;
2659 }
2660 return PyBool_FromLong(cmp);
2661}
2662
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002663#include "clinic/_sre.c.h"
2664
2665static PyMethodDef pattern_methods[] = {
2666 _SRE_SRE_PATTERN_MATCH_METHODDEF
2667 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2668 _SRE_SRE_PATTERN_SEARCH_METHODDEF
2669 _SRE_SRE_PATTERN_SUB_METHODDEF
2670 _SRE_SRE_PATTERN_SUBN_METHODDEF
2671 _SRE_SRE_PATTERN_FINDALL_METHODDEF
2672 _SRE_SRE_PATTERN_SPLIT_METHODDEF
2673 _SRE_SRE_PATTERN_FINDITER_METHODDEF
2674 _SRE_SRE_PATTERN_SCANNER_METHODDEF
2675 _SRE_SRE_PATTERN___COPY___METHODDEF
2676 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2677 {NULL, NULL}
2678};
2679
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002680static PyGetSetDef pattern_getset[] = {
2681 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2682 "A dictionary mapping group names to group numbers."},
2683 {NULL} /* Sentinel */
2684};
2685
2686#define PAT_OFF(x) offsetof(PatternObject, x)
2687static PyMemberDef pattern_members[] = {
2688 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
2689 {"flags", T_INT, PAT_OFF(flags), READONLY},
2690 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
2691 {NULL} /* Sentinel */
2692};
2693
2694static PyTypeObject Pattern_Type = {
2695 PyVarObject_HEAD_INIT(NULL, 0)
2696 "_" SRE_MODULE ".SRE_Pattern",
2697 sizeof(PatternObject), sizeof(SRE_CODE),
2698 (destructor)pattern_dealloc, /* tp_dealloc */
2699 0, /* tp_print */
2700 0, /* tp_getattr */
2701 0, /* tp_setattr */
2702 0, /* tp_reserved */
2703 (reprfunc)pattern_repr, /* tp_repr */
2704 0, /* tp_as_number */
2705 0, /* tp_as_sequence */
2706 0, /* tp_as_mapping */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002707 (hashfunc)pattern_hash, /* tp_hash */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002708 0, /* tp_call */
2709 0, /* tp_str */
2710 0, /* tp_getattro */
2711 0, /* tp_setattro */
2712 0, /* tp_as_buffer */
2713 Py_TPFLAGS_DEFAULT, /* tp_flags */
2714 pattern_doc, /* tp_doc */
2715 0, /* tp_traverse */
2716 0, /* tp_clear */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002717 pattern_richcompare, /* tp_richcompare */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002718 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2719 0, /* tp_iter */
2720 0, /* tp_iternext */
2721 pattern_methods, /* tp_methods */
2722 pattern_members, /* tp_members */
2723 pattern_getset, /* tp_getset */
2724};
2725
Eric V. Smith605bdae2016-09-11 08:55:43 -04002726/* Match objects do not support length or assignment, but do support
2727 __getitem__. */
2728static PyMappingMethods match_as_mapping = {
2729 NULL,
2730 (binaryfunc)match_getitem,
2731 NULL
2732};
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002733
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002734static PyMethodDef match_methods[] = {
2735 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2736 _SRE_SRE_MATCH_START_METHODDEF
2737 _SRE_SRE_MATCH_END_METHODDEF
2738 _SRE_SRE_MATCH_SPAN_METHODDEF
2739 _SRE_SRE_MATCH_GROUPS_METHODDEF
2740 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2741 _SRE_SRE_MATCH_EXPAND_METHODDEF
2742 _SRE_SRE_MATCH___COPY___METHODDEF
2743 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2744 {NULL, NULL}
2745};
2746
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002747static PyGetSetDef match_getset[] = {
2748 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
2749 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
2750 {"regs", (getter)match_regs_get, (setter)NULL},
2751 {NULL}
2752};
2753
2754#define MATCH_OFF(x) offsetof(MatchObject, x)
2755static PyMemberDef match_members[] = {
2756 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
2757 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
2758 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
2759 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
2760 {NULL}
2761};
2762
2763/* FIXME: implement setattr("string", None) as a special case (to
2764 detach the associated string, if any */
2765
2766static PyTypeObject Match_Type = {
2767 PyVarObject_HEAD_INIT(NULL,0)
2768 "_" SRE_MODULE ".SRE_Match",
2769 sizeof(MatchObject), sizeof(Py_ssize_t),
2770 (destructor)match_dealloc, /* tp_dealloc */
2771 0, /* tp_print */
2772 0, /* tp_getattr */
2773 0, /* tp_setattr */
2774 0, /* tp_reserved */
2775 (reprfunc)match_repr, /* tp_repr */
2776 0, /* tp_as_number */
2777 0, /* tp_as_sequence */
Eric V. Smith605bdae2016-09-11 08:55:43 -04002778 &match_as_mapping, /* tp_as_mapping */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002779 0, /* tp_hash */
2780 0, /* tp_call */
2781 0, /* tp_str */
2782 0, /* tp_getattro */
2783 0, /* tp_setattro */
2784 0, /* tp_as_buffer */
2785 Py_TPFLAGS_DEFAULT, /* tp_flags */
2786 match_doc, /* tp_doc */
2787 0, /* tp_traverse */
2788 0, /* tp_clear */
2789 0, /* tp_richcompare */
2790 0, /* tp_weaklistoffset */
2791 0, /* tp_iter */
2792 0, /* tp_iternext */
2793 match_methods, /* tp_methods */
2794 match_members, /* tp_members */
2795 match_getset, /* tp_getset */
2796};
2797
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002798static PyMethodDef scanner_methods[] = {
2799 _SRE_SRE_SCANNER_MATCH_METHODDEF
2800 _SRE_SRE_SCANNER_SEARCH_METHODDEF
2801 {NULL, NULL}
2802};
2803
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002804#define SCAN_OFF(x) offsetof(ScannerObject, x)
2805static PyMemberDef scanner_members[] = {
2806 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2807 {NULL} /* Sentinel */
2808};
2809
2810static PyTypeObject Scanner_Type = {
2811 PyVarObject_HEAD_INIT(NULL, 0)
2812 "_" SRE_MODULE ".SRE_Scanner",
2813 sizeof(ScannerObject), 0,
2814 (destructor)scanner_dealloc,/* tp_dealloc */
2815 0, /* tp_print */
2816 0, /* tp_getattr */
2817 0, /* tp_setattr */
2818 0, /* tp_reserved */
2819 0, /* tp_repr */
2820 0, /* tp_as_number */
2821 0, /* tp_as_sequence */
2822 0, /* tp_as_mapping */
2823 0, /* tp_hash */
2824 0, /* tp_call */
2825 0, /* tp_str */
2826 0, /* tp_getattro */
2827 0, /* tp_setattro */
2828 0, /* tp_as_buffer */
2829 Py_TPFLAGS_DEFAULT, /* tp_flags */
2830 0, /* tp_doc */
2831 0, /* tp_traverse */
2832 0, /* tp_clear */
2833 0, /* tp_richcompare */
2834 0, /* tp_weaklistoffset */
2835 0, /* tp_iter */
2836 0, /* tp_iternext */
2837 scanner_methods, /* tp_methods */
2838 scanner_members, /* tp_members */
2839 0, /* tp_getset */
2840};
2841
Guido van Rossumb700df92000-03-31 14:59:30 +00002842static PyMethodDef _functions[] = {
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002843 _SRE_COMPILE_METHODDEF
2844 _SRE_GETCODESIZE_METHODDEF
2845 _SRE_GETLOWER_METHODDEF
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002846 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002847};
2848
Martin v. Löwis1a214512008-06-11 05:26:20 +00002849static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002850 PyModuleDef_HEAD_INIT,
2851 "_" SRE_MODULE,
2852 NULL,
2853 -1,
2854 _functions,
2855 NULL,
2856 NULL,
2857 NULL,
2858 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002859};
2860
2861PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002862{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002863 PyObject* m;
2864 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002865 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002866
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002867 /* Patch object types */
2868 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2869 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002870 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002871
Martin v. Löwis1a214512008-06-11 05:26:20 +00002872 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002873 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002874 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002875 d = PyModule_GetDict(m);
2876
Christian Heimes217cfd12007-12-02 14:31:20 +00002877 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002878 if (x) {
2879 PyDict_SetItemString(d, "MAGIC", x);
2880 Py_DECREF(x);
2881 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002882
Christian Heimes217cfd12007-12-02 14:31:20 +00002883 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002884 if (x) {
2885 PyDict_SetItemString(d, "CODESIZE", x);
2886 Py_DECREF(x);
2887 }
2888
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002889 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2890 if (x) {
2891 PyDict_SetItemString(d, "MAXREPEAT", x);
2892 Py_DECREF(x);
2893 }
2894
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03002895 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2896 if (x) {
2897 PyDict_SetItemString(d, "MAXGROUPS", x);
2898 Py_DECREF(x);
2899 }
2900
Neal Norwitzfe537132007-08-26 03:55:15 +00002901 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002902 if (x) {
2903 PyDict_SetItemString(d, "copyright", x);
2904 Py_DECREF(x);
2905 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002906 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002907}
2908
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002909/* vim:ts=4:sw=4:et
2910*/