blob: d09249672f8bc3615581c13d96366c4b864c76ac [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020038static const char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000063/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000065/* enables copy/deepcopy handling (work in progress) */
66#undef USE_BUILTIN_COPY
67
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000068/* -------------------------------------------------------------------- */
69
Fredrik Lundh80946112000-06-29 18:03:25 +000070#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000071#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000072#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000073/* fastest possible local call under MSVC */
74#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000075#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000076#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000077#else
78#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000079#endif
80
81/* error codes */
82#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000083#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000084#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000085#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000086#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000087
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000088#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000089#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000090#else
91#define TRACE(v)
92#endif
93
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000094/* -------------------------------------------------------------------- */
95/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000096
Fredrik Lundh436c3d582000-06-29 08:58:44 +000097#define SRE_IS_DIGIT(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030098 ((ch) < 128 && Py_ISDIGIT(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000099#define SRE_IS_SPACE(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300100 ((ch) < 128 && Py_ISSPACE(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000101#define SRE_IS_LINEBREAK(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300102 ((ch) == '\n')
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000103#define SRE_IS_ALNUM(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300104 ((ch) < 128 && Py_ISALNUM(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000105#define SRE_IS_WORD(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300106 ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
Guido van Rossumb700df92000-03-31 14:59:30 +0000107
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000108static unsigned int sre_lower(unsigned int ch)
109{
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300110 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000111}
112
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200113static unsigned int sre_upper(unsigned int ch)
114{
115 return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
116}
117
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000118/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000119/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
120 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000121#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000122#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
123
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000124static unsigned int sre_lower_locale(unsigned int ch)
125{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000126 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000127}
128
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200129static unsigned int sre_upper_locale(unsigned int ch)
130{
131 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
132}
133
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000134/* unicode-specific character predicates */
135
Victor Stinner0058b862011-09-29 03:27:47 +0200136#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
137#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
138#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
139#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
140#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000141
142static unsigned int sre_lower_unicode(unsigned int ch)
143{
Victor Stinner0058b862011-09-29 03:27:47 +0200144 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000145}
146
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200147static unsigned int sre_upper_unicode(unsigned int ch)
148{
149 return (unsigned int) Py_UNICODE_TOUPPER(ch);
150}
151
Guido van Rossumb700df92000-03-31 14:59:30 +0000152LOCAL(int)
153sre_category(SRE_CODE category, unsigned int ch)
154{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000155 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000156
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000157 case SRE_CATEGORY_DIGIT:
158 return SRE_IS_DIGIT(ch);
159 case SRE_CATEGORY_NOT_DIGIT:
160 return !SRE_IS_DIGIT(ch);
161 case SRE_CATEGORY_SPACE:
162 return SRE_IS_SPACE(ch);
163 case SRE_CATEGORY_NOT_SPACE:
164 return !SRE_IS_SPACE(ch);
165 case SRE_CATEGORY_WORD:
166 return SRE_IS_WORD(ch);
167 case SRE_CATEGORY_NOT_WORD:
168 return !SRE_IS_WORD(ch);
169 case SRE_CATEGORY_LINEBREAK:
170 return SRE_IS_LINEBREAK(ch);
171 case SRE_CATEGORY_NOT_LINEBREAK:
172 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000173
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000174 case SRE_CATEGORY_LOC_WORD:
175 return SRE_LOC_IS_WORD(ch);
176 case SRE_CATEGORY_LOC_NOT_WORD:
177 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000178
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000179 case SRE_CATEGORY_UNI_DIGIT:
180 return SRE_UNI_IS_DIGIT(ch);
181 case SRE_CATEGORY_UNI_NOT_DIGIT:
182 return !SRE_UNI_IS_DIGIT(ch);
183 case SRE_CATEGORY_UNI_SPACE:
184 return SRE_UNI_IS_SPACE(ch);
185 case SRE_CATEGORY_UNI_NOT_SPACE:
186 return !SRE_UNI_IS_SPACE(ch);
187 case SRE_CATEGORY_UNI_WORD:
188 return SRE_UNI_IS_WORD(ch);
189 case SRE_CATEGORY_UNI_NOT_WORD:
190 return !SRE_UNI_IS_WORD(ch);
191 case SRE_CATEGORY_UNI_LINEBREAK:
192 return SRE_UNI_IS_LINEBREAK(ch);
193 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
194 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000195 }
196 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000197}
198
199/* helpers */
200
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000201static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000202data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000203{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000204 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000205 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000206 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000207 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000208 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000209}
210
211static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000212data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000213{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000214 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000215 minsize = state->data_stack_base+size;
216 cursize = state->data_stack_size;
217 if (cursize < minsize) {
218 void* stack;
219 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300220 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000221 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000222 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000223 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000224 return SRE_ERROR_MEMORY;
225 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000226 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000227 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000228 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000229 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000230}
231
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000232/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000233
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300234#define SRE_CHAR Py_UCS1
235#define SIZEOF_SRE_CHAR 1
236#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300237#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000238
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300239/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000240
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300241#define SRE_CHAR Py_UCS2
242#define SIZEOF_SRE_CHAR 2
243#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300244#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000245
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300246/* generate 32-bit unicode version */
247
248#define SRE_CHAR Py_UCS4
249#define SIZEOF_SRE_CHAR 4
250#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300251#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000252
253/* -------------------------------------------------------------------- */
254/* factories and destructors */
255
256/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100257static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300258static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
Guido van Rossumb700df92000-03-31 14:59:30 +0000259
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300260
261/*[clinic input]
262module _sre
263class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
264class _sre.SRE_Match "MatchObject *" "&Match_Type"
265class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
266[clinic start generated code]*/
267/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
268
Larry Hastings2d0a69a2015-05-03 14:49:19 -0700269static PyTypeObject Pattern_Type;
270static PyTypeObject Match_Type;
271static PyTypeObject Scanner_Type;
272
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300273/*[clinic input]
274_sre.getcodesize -> int
275[clinic start generated code]*/
276
277static int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300278_sre_getcodesize_impl(PyObject *module)
279/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000280{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300281 return sizeof(SRE_CODE);
Guido van Rossumb700df92000-03-31 14:59:30 +0000282}
283
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300284/*[clinic input]
285_sre.getlower -> int
286
287 character: int
288 flags: int
289 /
290
291[clinic start generated code]*/
292
293static int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300294_sre_getlower_impl(PyObject *module, int character, int flags)
295/*[clinic end generated code: output=47eebc4c1214feb5 input=087d2f1c44bbca6f]*/
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000296{
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000297 if (flags & SRE_FLAG_LOCALE)
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300298 return sre_lower_locale(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000299 if (flags & SRE_FLAG_UNICODE)
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300300 return sre_lower_unicode(character);
301 return sre_lower(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000302}
303
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000304LOCAL(void)
305state_reset(SRE_STATE* state)
306{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000307 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000308 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000309
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000310 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000311 state->lastindex = -1;
312
313 state->repeat = NULL;
314
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000315 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000316}
317
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000318static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200319getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300320 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600321 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000322{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000323 /* given a python object, return a data pointer, a length (in
324 characters), and a character size. return NULL if the object
325 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000326
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000327 /* Unicode objects do not support the buffer API. So, get the data
328 directly instead. */
329 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200330 if (PyUnicode_READY(string) == -1)
331 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200332 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200333 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300334 *p_isbytes = 0;
335 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000336 }
337
Victor Stinner0058b862011-09-29 03:27:47 +0200338 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300339 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200340 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300341 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000342 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000343
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300344 *p_length = view->len;
345 *p_charsize = 1;
346 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000347
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300348 if (view->buf == NULL) {
349 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
350 PyBuffer_Release(view);
351 view->buf = NULL;
352 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000353 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300354 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000355}
356
357LOCAL(PyObject*)
358state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000359 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000360{
361 /* prepare state object */
362
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000363 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300364 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000365 void* ptr;
366
367 memset(state, 0, sizeof(SRE_STATE));
368
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300369 state->mark = PyMem_New(void *, pattern->groups * 2);
370 if (!state->mark) {
371 PyErr_NoMemory();
372 goto err;
373 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000374 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000375 state->lastindex = -1;
376
Benjamin Petersone48944b2012-03-07 14:50:25 -0600377 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300378 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000379 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600380 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000381
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300382 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600383 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200384 "cannot use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600385 goto err;
386 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300387 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600388 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200389 "cannot use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600390 goto err;
391 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000392
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000393 /* adjust boundaries */
394 if (start < 0)
395 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000396 else if (start > length)
397 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000398
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000399 if (end < 0)
400 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000401 else if (end > length)
402 end = length;
403
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300404 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000405 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000406
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000407 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000408
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000409 state->start = (void*) ((char*) ptr + start * state->charsize);
410 state->end = (void*) ((char*) ptr + end * state->charsize);
411
412 Py_INCREF(string);
413 state->string = string;
414 state->pos = start;
415 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000416
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200417 if (pattern->flags & SRE_FLAG_LOCALE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000418 state->lower = sre_lower_locale;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200419 state->upper = sre_upper_locale;
420 }
421 else if (pattern->flags & SRE_FLAG_UNICODE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000422 state->lower = sre_lower_unicode;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200423 state->upper = sre_upper_unicode;
424 }
425 else {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000426 state->lower = sre_lower;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200427 state->upper = sre_upper;
428 }
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000429
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000430 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600431 err:
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300432 PyMem_Del(state->mark);
433 state->mark = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600434 if (state->buffer.buf)
435 PyBuffer_Release(&state->buffer);
436 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000437}
438
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000439LOCAL(void)
440state_fini(SRE_STATE* state)
441{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600442 if (state->buffer.buf)
443 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000444 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000445 data_stack_dealloc(state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300446 PyMem_Del(state->mark);
447 state->mark = NULL;
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000448}
449
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000450/* calculate offset from start of string */
451#define STATE_OFFSET(state, member)\
452 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
453
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000454LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300455getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300456 PyObject* string, Py_ssize_t start, Py_ssize_t end)
457{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300458 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300459 if (PyBytes_CheckExact(string) &&
460 start == 0 && end == PyBytes_GET_SIZE(string)) {
461 Py_INCREF(string);
462 return string;
463 }
464 return PyBytes_FromStringAndSize(
465 (const char *)ptr + start, end - start);
466 }
467 else {
468 return PyUnicode_Substring(string, start, end);
469 }
470}
471
472LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000473state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000474{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000475 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000476
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000477 index = (index - 1) * 2;
478
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000479 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000480 if (empty)
481 /* want empty string */
482 i = j = 0;
483 else {
484 Py_INCREF(Py_None);
485 return Py_None;
486 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000487 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000488 i = STATE_OFFSET(state, state->mark[index]);
489 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000490 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000491
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300492 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000493}
494
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000495static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100496pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000497{
498 switch (status) {
499 case SRE_ERROR_RECURSION_LIMIT:
Yury Selivanovf488fb42015-07-03 01:04:23 -0400500 /* This error code seems to be unused. */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000501 PyErr_SetString(
Yury Selivanovf488fb42015-07-03 01:04:23 -0400502 PyExc_RecursionError,
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000503 "maximum recursion limit exceeded"
504 );
505 break;
506 case SRE_ERROR_MEMORY:
507 PyErr_NoMemory();
508 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000509 case SRE_ERROR_INTERRUPTED:
510 /* An exception has already been raised, so let it fly */
511 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000512 default:
513 /* other error codes indicate compiler/engine bugs */
514 PyErr_SetString(
515 PyExc_RuntimeError,
516 "internal error in regular expression engine"
517 );
518 }
519}
520
Guido van Rossumb700df92000-03-31 14:59:30 +0000521static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000522pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000523{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000524 if (self->weakreflist != NULL)
525 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000526 Py_XDECREF(self->pattern);
527 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000528 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000529 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000530}
531
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300532LOCAL(Py_ssize_t)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300533sre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300534{
535 if (state->charsize == 1)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300536 return sre_ucs1_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300537 if (state->charsize == 2)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300538 return sre_ucs2_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300539 assert(state->charsize == 4);
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300540 return sre_ucs4_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300541}
542
543LOCAL(Py_ssize_t)
544sre_search(SRE_STATE* state, SRE_CODE* pattern)
545{
546 if (state->charsize == 1)
547 return sre_ucs1_search(state, pattern);
548 if (state->charsize == 2)
549 return sre_ucs2_search(state, pattern);
550 assert(state->charsize == 4);
551 return sre_ucs4_search(state, pattern);
552}
553
Larry Hastings16c51912014-01-07 11:53:01 -0800554static PyObject *
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200555fix_string_param(PyObject *string, PyObject *string2, const char *oldname)
556{
557 if (string2 != NULL) {
558 if (string != NULL) {
559 PyErr_Format(PyExc_TypeError,
560 "Argument given by name ('%s') and position (1)",
561 oldname);
562 return NULL;
563 }
564 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
565 "The '%s' keyword parameter name is deprecated. "
566 "Use 'string' instead.", oldname) < 0)
567 return NULL;
568 return string2;
569 }
570 if (string == NULL) {
571 PyErr_SetString(PyExc_TypeError,
572 "Required argument 'string' (pos 1) not found");
573 return NULL;
574 }
575 return string;
576}
Larry Hastings16c51912014-01-07 11:53:01 -0800577
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300578/*[clinic input]
579_sre.SRE_Pattern.match
580
581 string: object = NULL
582 pos: Py_ssize_t = 0
583 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
584 *
585 pattern: object = NULL
586
587Matches zero or more characters at the beginning of the string.
588[clinic start generated code]*/
589
Larry Hastings16c51912014-01-07 11:53:01 -0800590static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300591_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
592 Py_ssize_t pos, Py_ssize_t endpos,
593 PyObject *pattern)
594/*[clinic end generated code: output=74b4b1da3bb2d84e input=3d079aa99979b81d]*/
Larry Hastings16c51912014-01-07 11:53:01 -0800595{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000596 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100597 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300598 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000599
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200600 string = fix_string_param(string, pattern, "pattern");
601 if (!string)
602 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300603 if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000604 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000605
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000606 state.ptr = state.start;
607
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000608 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
609
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300610 status = sre_match(&state, PatternObject_GetCode(self), 0);
Guido van Rossumb700df92000-03-31 14:59:30 +0000611
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000612 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300613 if (PyErr_Occurred()) {
614 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000615 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300616 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000617
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300618 match = pattern_new_match(self, &state, status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000619 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300620 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000621}
622
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300623/*[clinic input]
624_sre.SRE_Pattern.fullmatch
625
626 string: object = NULL
627 pos: Py_ssize_t = 0
628 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
629 *
630 pattern: object = NULL
631
632Matches against all of the string
633[clinic start generated code]*/
634
635static PyObject *
636_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
637 Py_ssize_t pos, Py_ssize_t endpos,
638 PyObject *pattern)
639/*[clinic end generated code: output=1c98bc5da744ea94 input=d4228606cc12580f]*/
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200640{
641 SRE_STATE state;
642 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300643 PyObject *match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200644
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300645 string = fix_string_param(string, pattern, "pattern");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200646 if (!string)
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200647 return NULL;
648
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300649 if (!state_init(&state, self, string, pos, endpos))
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200650 return NULL;
651
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200652 state.ptr = state.start;
653
654 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
655
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300656 status = sre_match(&state, PatternObject_GetCode(self), 1);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200657
658 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300659 if (PyErr_Occurred()) {
660 state_fini(&state);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200661 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300662 }
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200663
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300664 match = pattern_new_match(self, &state, status);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200665 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300666 return match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200667}
668
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300669/*[clinic input]
670_sre.SRE_Pattern.search
671
672 string: object = NULL
673 pos: Py_ssize_t = 0
674 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
675 *
676 pattern: object = NULL
677
678Scan through string looking for a match, and return a corresponding match object instance.
679
680Return None if no position in the string matches.
681[clinic start generated code]*/
682
683static PyObject *
684_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
685 Py_ssize_t pos, Py_ssize_t endpos,
686 PyObject *pattern)
687/*[clinic end generated code: output=3839394a18e5ea4f input=dab42720f4be3a4b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000688{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000689 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100690 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300691 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000692
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300693 string = fix_string_param(string, pattern, "pattern");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200694 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000695 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000696
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300697 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000698 return NULL;
699
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000700 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
701
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300702 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000703
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000704 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
705
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300706 if (PyErr_Occurred()) {
707 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000708 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300709 }
Thomas Wouters89f507f2006-12-13 04:49:30 +0000710
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300711 match = pattern_new_match(self, &state, status);
712 state_fini(&state);
713 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000714}
715
716static PyObject*
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200717call(const char* module, const char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000718{
719 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000720 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000721 PyObject* func;
722 PyObject* result;
723
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000724 if (!args)
725 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000726 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000727 if (!name)
728 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000729 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000730 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000731 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000732 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000733 func = PyObject_GetAttrString(mod, function);
734 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000735 if (!func)
736 return NULL;
737 result = PyObject_CallObject(func, args);
738 Py_DECREF(func);
739 Py_DECREF(args);
740 return result;
741}
742
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000743#ifdef USE_BUILTIN_COPY
744static int
745deepcopy(PyObject** object, PyObject* memo)
746{
747 PyObject* copy;
748
749 copy = call(
750 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000751 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000752 );
753 if (!copy)
754 return 0;
755
Serhiy Storchaka57a01d32016-04-10 18:05:40 +0300756 Py_SETREF(*object, copy);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000757
758 return 1; /* success */
759}
760#endif
761
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300762/*[clinic input]
763_sre.SRE_Pattern.findall
764
765 string: object = NULL
766 pos: Py_ssize_t = 0
767 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
768 *
769 source: object = NULL
770
771Return a list of all non-overlapping matches of pattern in string.
772[clinic start generated code]*/
773
774static PyObject *
775_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
776 Py_ssize_t pos, Py_ssize_t endpos,
777 PyObject *source)
778/*[clinic end generated code: output=51295498b300639d input=df688355c056b9de]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000779{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000780 SRE_STATE state;
781 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100782 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000783 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000784
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300785 string = fix_string_param(string, source, "source");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200786 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000787 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000788
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300789 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000790 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000791
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000792 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000793 if (!list) {
794 state_fini(&state);
795 return NULL;
796 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000797
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000798 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000799
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000800 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000801
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000802 state_reset(&state);
803
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000804 state.ptr = state.start;
805
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300806 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300807 if (PyErr_Occurred())
808 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000809
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000810 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000811 if (status == 0)
812 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000813 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000814 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000815 }
Tim Peters3d563502006-01-21 02:47:53 +0000816
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000817 /* don't bother to build a match object */
818 switch (self->groups) {
819 case 0:
820 b = STATE_OFFSET(&state, state.start);
821 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300822 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300823 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000824 if (!item)
825 goto error;
826 break;
827 case 1:
828 item = state_getslice(&state, 1, string, 1);
829 if (!item)
830 goto error;
831 break;
832 default:
833 item = PyTuple_New(self->groups);
834 if (!item)
835 goto error;
836 for (i = 0; i < self->groups; i++) {
837 PyObject* o = state_getslice(&state, i+1, string, 1);
838 if (!o) {
839 Py_DECREF(item);
840 goto error;
841 }
842 PyTuple_SET_ITEM(item, i, o);
843 }
844 break;
845 }
846
847 status = PyList_Append(list, item);
848 Py_DECREF(item);
849 if (status < 0)
850 goto error;
851
852 if (state.ptr == state.start)
853 state.start = (void*) ((char*) state.ptr + state.charsize);
854 else
855 state.start = state.ptr;
856
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000857 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000858
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000859 state_fini(&state);
860 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000861
862error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000863 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000864 state_fini(&state);
865 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000866
Guido van Rossumb700df92000-03-31 14:59:30 +0000867}
868
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300869/*[clinic input]
870_sre.SRE_Pattern.finditer
871
872 string: object
873 pos: Py_ssize_t = 0
874 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
875
876Return an iterator over all non-overlapping matches for the RE pattern in string.
877
878For each match, the iterator returns a match object.
879[clinic start generated code]*/
880
881static PyObject *
882_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
883 Py_ssize_t pos, Py_ssize_t endpos)
884/*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
Fredrik Lundh703ce812001-10-24 22:16:30 +0000885{
886 PyObject* scanner;
887 PyObject* search;
888 PyObject* iterator;
889
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300890 scanner = pattern_scanner(self, string, pos, endpos);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000891 if (!scanner)
892 return NULL;
893
894 search = PyObject_GetAttrString(scanner, "search");
895 Py_DECREF(scanner);
896 if (!search)
897 return NULL;
898
899 iterator = PyCallIter_New(search, Py_None);
900 Py_DECREF(search);
901
902 return iterator;
903}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000904
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300905/*[clinic input]
906_sre.SRE_Pattern.scanner
907
908 string: object
909 pos: Py_ssize_t = 0
910 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
911
912[clinic start generated code]*/
913
914static PyObject *
915_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
916 Py_ssize_t pos, Py_ssize_t endpos)
917/*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
918{
919 return pattern_scanner(self, string, pos, endpos);
920}
921
922/*[clinic input]
923_sre.SRE_Pattern.split
924
925 string: object = NULL
926 maxsplit: Py_ssize_t = 0
927 *
928 source: object = NULL
929
930Split string by the occurrences of pattern.
931[clinic start generated code]*/
932
933static PyObject *
934_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
935 Py_ssize_t maxsplit, PyObject *source)
936/*[clinic end generated code: output=20bac2ff55b9f84c input=41e0b2e35e599d7b]*/
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000937{
938 SRE_STATE state;
939 PyObject* list;
940 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100941 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000942 Py_ssize_t n;
943 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000944 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000945
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300946 string = fix_string_param(string, source, "source");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200947 if (!string)
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000948 return NULL;
949
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200950 assert(self->codesize != 0);
951 if (self->code[0] != SRE_OP_INFO || self->code[3] == 0) {
952 if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) {
953 PyErr_SetString(PyExc_ValueError,
954 "split() requires a non-empty pattern match.");
955 return NULL;
956 }
957 if (PyErr_WarnEx(PyExc_FutureWarning,
958 "split() requires a non-empty pattern match.",
959 1) < 0)
960 return NULL;
961 }
962
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300963 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000964 return NULL;
965
966 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000967 if (!list) {
968 state_fini(&state);
969 return NULL;
970 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000971
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000972 n = 0;
973 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000974
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000975 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000976
977 state_reset(&state);
978
979 state.ptr = state.start;
980
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300981 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300982 if (PyErr_Occurred())
983 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000984
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000985 if (status <= 0) {
986 if (status == 0)
987 break;
988 pattern_error(status);
989 goto error;
990 }
Tim Peters3d563502006-01-21 02:47:53 +0000991
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000992 if (state.start == state.ptr) {
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +0300993 if (last == state.end || state.ptr == state.end)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000994 break;
995 /* skip one character */
996 state.start = (void*) ((char*) state.ptr + state.charsize);
997 continue;
998 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000999
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001000 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001001 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001002 string, STATE_OFFSET(&state, last),
1003 STATE_OFFSET(&state, state.start)
1004 );
1005 if (!item)
1006 goto error;
1007 status = PyList_Append(list, item);
1008 Py_DECREF(item);
1009 if (status < 0)
1010 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001011
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001012 /* add groups (if any) */
1013 for (i = 0; i < self->groups; i++) {
1014 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001015 if (!item)
1016 goto error;
1017 status = PyList_Append(list, item);
1018 Py_DECREF(item);
1019 if (status < 0)
1020 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001021 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001022
1023 n = n + 1;
1024
1025 last = state.start = state.ptr;
1026
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001027 }
1028
Fredrik Lundhf864aa82001-10-22 06:01:56 +00001029 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001030 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +00001031 string, STATE_OFFSET(&state, last), state.endpos
1032 );
1033 if (!item)
1034 goto error;
1035 status = PyList_Append(list, item);
1036 Py_DECREF(item);
1037 if (status < 0)
1038 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001039
1040 state_fini(&state);
1041 return list;
1042
1043error:
1044 Py_DECREF(list);
1045 state_fini(&state);
1046 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001047
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001048}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001049
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001050static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001051pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001052 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001053{
1054 SRE_STATE state;
1055 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001056 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001057 PyObject* item;
1058 PyObject* filter;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001059 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001060 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01001061 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001062 Py_ssize_t n;
1063 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001064 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001065 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001066 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001067
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001068 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001069 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001070 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001071 Py_INCREF(filter);
1072 filter_is_callable = 1;
1073 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001074 /* if not callable, check if it's a literal string */
1075 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001076 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001077 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001078 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001079 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001080 if (charsize == 1)
1081 literal = memchr(ptr, '\\', n) == NULL;
1082 else
1083 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001084 } else {
1085 PyErr_Clear();
1086 literal = 0;
1087 }
Benjamin Petersone48944b2012-03-07 14:50:25 -06001088 if (view.buf)
1089 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001090 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001091 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001092 Py_INCREF(filter);
1093 filter_is_callable = 0;
1094 } else {
1095 /* not a literal; hand it over to the template compiler */
1096 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001097 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001098 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001099 );
1100 if (!filter)
1101 return NULL;
1102 filter_is_callable = PyCallable_Check(filter);
1103 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00001104 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001105
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001106 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001107 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001108 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001109 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001110
1111 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001112 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001113 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001114 state_fini(&state);
1115 return NULL;
1116 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001117
1118 n = i = 0;
1119
1120 while (!count || n < count) {
1121
1122 state_reset(&state);
1123
1124 state.ptr = state.start;
1125
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001126 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001127 if (PyErr_Occurred())
1128 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001129
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001130 if (status <= 0) {
1131 if (status == 0)
1132 break;
1133 pattern_error(status);
1134 goto error;
1135 }
Tim Peters3d563502006-01-21 02:47:53 +00001136
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001137 b = STATE_OFFSET(&state, state.start);
1138 e = STATE_OFFSET(&state, state.ptr);
1139
1140 if (i < b) {
1141 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001142 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001143 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001144 if (!item)
1145 goto error;
1146 status = PyList_Append(list, item);
1147 Py_DECREF(item);
1148 if (status < 0)
1149 goto error;
1150
1151 } else if (i == b && i == e && n > 0)
1152 /* ignore empty match on latest position */
1153 goto next;
1154
1155 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001156 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001157 match = pattern_new_match(self, &state, 1);
1158 if (!match)
1159 goto error;
Victor Stinner559bb6a2016-08-22 22:48:54 +02001160 item = _PyObject_CallArg1(filter, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001161 Py_DECREF(match);
1162 if (!item)
1163 goto error;
1164 } else {
1165 /* filter is literal string */
1166 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001167 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001168 }
1169
1170 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001171 if (item != Py_None) {
1172 status = PyList_Append(list, item);
1173 Py_DECREF(item);
1174 if (status < 0)
1175 goto error;
1176 }
Tim Peters3d563502006-01-21 02:47:53 +00001177
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001178 i = e;
1179 n = n + 1;
1180
1181next:
1182 /* move on */
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001183 if (state.ptr == state.end)
1184 break;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001185 if (state.ptr == state.start)
1186 state.start = (void*) ((char*) state.ptr + state.charsize);
1187 else
1188 state.start = state.ptr;
1189
1190 }
1191
1192 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001193 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001194 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001195 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001196 if (!item)
1197 goto error;
1198 status = PyList_Append(list, item);
1199 Py_DECREF(item);
1200 if (status < 0)
1201 goto error;
1202 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001203
1204 state_fini(&state);
1205
Guido van Rossum4e173842001-12-07 04:25:10 +00001206 Py_DECREF(filter);
1207
Fredrik Lundhdac58492001-10-21 21:48:30 +00001208 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001209 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001210 if (!joiner) {
1211 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001212 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001213 }
1214 if (PyList_GET_SIZE(list) == 0) {
1215 Py_DECREF(list);
1216 item = joiner;
1217 }
1218 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001219 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001220 item = _PyBytes_Join(joiner, list);
1221 else
1222 item = PyUnicode_Join(joiner, list);
1223 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001224 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001225 if (!item)
1226 return NULL;
1227 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001228
1229 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001230 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001231
1232 return item;
1233
1234error:
1235 Py_DECREF(list);
1236 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001237 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001238 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001239
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001240}
1241
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001242/*[clinic input]
1243_sre.SRE_Pattern.sub
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001244
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001245 repl: object
1246 string: object
1247 count: Py_ssize_t = 0
1248
1249Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1250[clinic start generated code]*/
1251
1252static PyObject *
1253_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1254 PyObject *string, Py_ssize_t count)
1255/*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1256{
1257 return pattern_subx(self, repl, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001258}
1259
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001260/*[clinic input]
1261_sre.SRE_Pattern.subn
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001262
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001263 repl: object
1264 string: object
1265 count: Py_ssize_t = 0
1266
1267Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1268[clinic start generated code]*/
1269
1270static PyObject *
1271_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1272 PyObject *string, Py_ssize_t count)
1273/*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1274{
1275 return pattern_subx(self, repl, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001276}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001277
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001278/*[clinic input]
1279_sre.SRE_Pattern.__copy__
1280
1281[clinic start generated code]*/
1282
1283static PyObject *
1284_sre_SRE_Pattern___copy___impl(PatternObject *self)
1285/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001286{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001287#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001288 PatternObject* copy;
1289 int offset;
1290
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001291 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
1292 if (!copy)
1293 return NULL;
1294
1295 offset = offsetof(PatternObject, groups);
1296
1297 Py_XINCREF(self->groupindex);
1298 Py_XINCREF(self->indexgroup);
1299 Py_XINCREF(self->pattern);
1300
1301 memcpy((char*) copy + offset, (char*) self + offset,
1302 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00001303 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001304
1305 return (PyObject*) copy;
1306#else
1307 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
1308 return NULL;
1309#endif
1310}
1311
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001312/*[clinic input]
1313_sre.SRE_Pattern.__deepcopy__
1314
1315 memo: object
1316
1317[clinic start generated code]*/
1318
1319static PyObject *
1320_sre_SRE_Pattern___deepcopy___impl(PatternObject *self, PyObject *memo)
1321/*[clinic end generated code: output=75efe69bd12c5d7d input=3959719482c07f70]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001322{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001323#ifdef USE_BUILTIN_COPY
1324 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00001325
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001326 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001327 if (!copy)
1328 return NULL;
1329
1330 if (!deepcopy(&copy->groupindex, memo) ||
1331 !deepcopy(&copy->indexgroup, memo) ||
1332 !deepcopy(&copy->pattern, memo)) {
1333 Py_DECREF(copy);
1334 return NULL;
1335 }
1336
1337#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001338 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
1339 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001340#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001341}
1342
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001343static PyObject *
1344pattern_repr(PatternObject *obj)
1345{
1346 static const struct {
1347 const char *name;
1348 int value;
1349 } flag_names[] = {
1350 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1351 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1352 {"re.LOCALE", SRE_FLAG_LOCALE},
1353 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1354 {"re.DOTALL", SRE_FLAG_DOTALL},
1355 {"re.UNICODE", SRE_FLAG_UNICODE},
1356 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1357 {"re.DEBUG", SRE_FLAG_DEBUG},
1358 {"re.ASCII", SRE_FLAG_ASCII},
1359 };
1360 PyObject *result = NULL;
1361 PyObject *flag_items;
Victor Stinner706768c2014-08-16 01:03:39 +02001362 size_t i;
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001363 int flags = obj->flags;
1364
1365 /* Omit re.UNICODE for valid string patterns. */
1366 if (obj->isbytes == 0 &&
1367 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1368 SRE_FLAG_UNICODE)
1369 flags &= ~SRE_FLAG_UNICODE;
1370
1371 flag_items = PyList_New(0);
1372 if (!flag_items)
1373 return NULL;
1374
1375 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1376 if (flags & flag_names[i].value) {
1377 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1378 if (!item)
1379 goto done;
1380
1381 if (PyList_Append(flag_items, item) < 0) {
1382 Py_DECREF(item);
1383 goto done;
1384 }
1385 Py_DECREF(item);
1386 flags &= ~flag_names[i].value;
1387 }
1388 }
1389 if (flags) {
1390 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1391 if (!item)
1392 goto done;
1393
1394 if (PyList_Append(flag_items, item) < 0) {
1395 Py_DECREF(item);
1396 goto done;
1397 }
1398 Py_DECREF(item);
1399 }
1400
1401 if (PyList_Size(flag_items) > 0) {
1402 PyObject *flags_result;
1403 PyObject *sep = PyUnicode_FromString("|");
1404 if (!sep)
1405 goto done;
1406 flags_result = PyUnicode_Join(sep, flag_items);
1407 Py_DECREF(sep);
1408 if (!flags_result)
1409 goto done;
1410 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1411 obj->pattern, flags_result);
1412 Py_DECREF(flags_result);
1413 }
1414 else {
1415 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1416 }
1417
1418done:
1419 Py_DECREF(flag_items);
1420 return result;
1421}
1422
Raymond Hettinger94478742004-09-24 04:31:19 +00001423PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
1424
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001425/* PatternObject's 'groupindex' method. */
1426static PyObject *
1427pattern_groupindex(PatternObject *self)
1428{
1429 return PyDictProxy_New(self->groupindex);
1430}
1431
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001432static int _validate(PatternObject *self); /* Forward */
1433
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001434/*[clinic input]
1435_sre.compile
1436
1437 pattern: object
1438 flags: int
1439 code: object(subclass_of='&PyList_Type')
1440 groups: Py_ssize_t
1441 groupindex: object
1442 indexgroup: object
1443
1444[clinic start generated code]*/
1445
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001446static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001447_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001448 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1449 PyObject *indexgroup)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001450/*[clinic end generated code: output=ef9c2b3693776404 input=7d059ec8ae1edb85]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001451{
1452 /* "compile" pattern descriptor to pattern object */
1453
1454 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001455 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001456
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001457 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001458 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001459 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1460 if (!self)
1461 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001462 self->weakreflist = NULL;
1463 self->pattern = NULL;
1464 self->groupindex = NULL;
1465 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001466
1467 self->codesize = n;
1468
1469 for (i = 0; i < n; i++) {
1470 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001471 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001472 self->code[i] = (SRE_CODE) value;
1473 if ((unsigned long) self->code[i] != value) {
1474 PyErr_SetString(PyExc_OverflowError,
1475 "regular expression code size limit exceeded");
1476 break;
1477 }
1478 }
1479
1480 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001481 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001482 return NULL;
1483 }
1484
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001485 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001486 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001487 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001488 else {
1489 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001490 int charsize;
1491 Py_buffer view;
1492 view.buf = NULL;
1493 if (!getstring(pattern, &p_length, &self->isbytes,
1494 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001495 Py_DECREF(self);
1496 return NULL;
1497 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001498 if (view.buf)
1499 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001500 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001501
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001502 Py_INCREF(pattern);
1503 self->pattern = pattern;
1504
1505 self->flags = flags;
1506
1507 self->groups = groups;
1508
Victor Stinnerb44fb122016-11-21 16:35:08 +01001509 Py_INCREF(groupindex);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001510 self->groupindex = groupindex;
1511
Victor Stinnerb44fb122016-11-21 16:35:08 +01001512 Py_INCREF(indexgroup);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001513 self->indexgroup = indexgroup;
1514
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001515 if (!_validate(self)) {
1516 Py_DECREF(self);
1517 return NULL;
1518 }
1519
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001520 return (PyObject*) self;
1521}
1522
Guido van Rossumb700df92000-03-31 14:59:30 +00001523/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001524/* Code validation */
1525
1526/* To learn more about this code, have a look at the _compile() function in
1527 Lib/sre_compile.py. The validation functions below checks the code array
1528 for conformance with the code patterns generated there.
1529
1530 The nice thing about the generated code is that it is position-independent:
1531 all jumps are relative jumps forward. Also, jumps don't cross each other:
1532 the target of a later jump is always earlier than the target of an earlier
1533 jump. IOW, this is okay:
1534
1535 J---------J-------T--------T
1536 \ \_____/ /
1537 \______________________/
1538
1539 but this is not:
1540
1541 J---------J-------T--------T
1542 \_________\_____/ /
1543 \____________/
1544
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001545 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001546*/
1547
1548/* Defining this one enables tracing of the validator */
1549#undef VVERBOSE
1550
1551/* Trace macro for the validator */
1552#if defined(VVERBOSE)
1553#define VTRACE(v) printf v
1554#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001555#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001556#endif
1557
1558/* Report failure */
1559#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1560
1561/* Extract opcode, argument, or skip count from code array */
1562#define GET_OP \
1563 do { \
1564 VTRACE(("%p: ", code)); \
1565 if (code >= end) FAIL; \
1566 op = *code++; \
1567 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1568 } while (0)
1569#define GET_ARG \
1570 do { \
1571 VTRACE(("%p= ", code)); \
1572 if (code >= end) FAIL; \
1573 arg = *code++; \
1574 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1575 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001576#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001577 do { \
1578 VTRACE(("%p= ", code)); \
1579 if (code >= end) FAIL; \
1580 skip = *code; \
1581 VTRACE(("%lu (skip to %p)\n", \
1582 (unsigned long)skip, code+skip)); \
Benjamin Petersonca470632016-09-06 13:47:26 -07001583 if (skip-adj > (uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001584 FAIL; \
1585 code++; \
1586 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001587#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001588
1589static int
1590_validate_charset(SRE_CODE *code, SRE_CODE *end)
1591{
1592 /* Some variables are manipulated by the macros above */
1593 SRE_CODE op;
1594 SRE_CODE arg;
1595 SRE_CODE offset;
1596 int i;
1597
1598 while (code < end) {
1599 GET_OP;
1600 switch (op) {
1601
1602 case SRE_OP_NEGATE:
1603 break;
1604
1605 case SRE_OP_LITERAL:
1606 GET_ARG;
1607 break;
1608
1609 case SRE_OP_RANGE:
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +02001610 case SRE_OP_RANGE_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001611 GET_ARG;
1612 GET_ARG;
1613 break;
1614
1615 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001616 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Benjamin Petersonca470632016-09-06 13:47:26 -07001617 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001618 FAIL;
1619 code += offset;
1620 break;
1621
1622 case SRE_OP_BIGCHARSET:
1623 GET_ARG; /* Number of blocks */
1624 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001625 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001626 FAIL;
1627 /* Make sure that each byte points to a valid block */
1628 for (i = 0; i < 256; i++) {
1629 if (((unsigned char *)code)[i] >= arg)
1630 FAIL;
1631 }
1632 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001633 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Benjamin Petersonca470632016-09-06 13:47:26 -07001634 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001635 FAIL;
1636 code += offset;
1637 break;
1638
1639 case SRE_OP_CATEGORY:
1640 GET_ARG;
1641 switch (arg) {
1642 case SRE_CATEGORY_DIGIT:
1643 case SRE_CATEGORY_NOT_DIGIT:
1644 case SRE_CATEGORY_SPACE:
1645 case SRE_CATEGORY_NOT_SPACE:
1646 case SRE_CATEGORY_WORD:
1647 case SRE_CATEGORY_NOT_WORD:
1648 case SRE_CATEGORY_LINEBREAK:
1649 case SRE_CATEGORY_NOT_LINEBREAK:
1650 case SRE_CATEGORY_LOC_WORD:
1651 case SRE_CATEGORY_LOC_NOT_WORD:
1652 case SRE_CATEGORY_UNI_DIGIT:
1653 case SRE_CATEGORY_UNI_NOT_DIGIT:
1654 case SRE_CATEGORY_UNI_SPACE:
1655 case SRE_CATEGORY_UNI_NOT_SPACE:
1656 case SRE_CATEGORY_UNI_WORD:
1657 case SRE_CATEGORY_UNI_NOT_WORD:
1658 case SRE_CATEGORY_UNI_LINEBREAK:
1659 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1660 break;
1661 default:
1662 FAIL;
1663 }
1664 break;
1665
1666 default:
1667 FAIL;
1668
1669 }
1670 }
1671
1672 return 1;
1673}
1674
1675static int
1676_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1677{
1678 /* Some variables are manipulated by the macros above */
1679 SRE_CODE op;
1680 SRE_CODE arg;
1681 SRE_CODE skip;
1682
1683 VTRACE(("code=%p, end=%p\n", code, end));
1684
1685 if (code > end)
1686 FAIL;
1687
1688 while (code < end) {
1689 GET_OP;
1690 switch (op) {
1691
1692 case SRE_OP_MARK:
1693 /* We don't check whether marks are properly nested; the
1694 sre_match() code is robust even if they don't, and the worst
1695 you can get is nonsensical match results. */
1696 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001697 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001698 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1699 FAIL;
1700 }
1701 break;
1702
1703 case SRE_OP_LITERAL:
1704 case SRE_OP_NOT_LITERAL:
1705 case SRE_OP_LITERAL_IGNORE:
1706 case SRE_OP_NOT_LITERAL_IGNORE:
1707 GET_ARG;
1708 /* The arg is just a character, nothing to check */
1709 break;
1710
1711 case SRE_OP_SUCCESS:
1712 case SRE_OP_FAILURE:
1713 /* Nothing to check; these normally end the matching process */
1714 break;
1715
1716 case SRE_OP_AT:
1717 GET_ARG;
1718 switch (arg) {
1719 case SRE_AT_BEGINNING:
1720 case SRE_AT_BEGINNING_STRING:
1721 case SRE_AT_BEGINNING_LINE:
1722 case SRE_AT_END:
1723 case SRE_AT_END_LINE:
1724 case SRE_AT_END_STRING:
1725 case SRE_AT_BOUNDARY:
1726 case SRE_AT_NON_BOUNDARY:
1727 case SRE_AT_LOC_BOUNDARY:
1728 case SRE_AT_LOC_NON_BOUNDARY:
1729 case SRE_AT_UNI_BOUNDARY:
1730 case SRE_AT_UNI_NON_BOUNDARY:
1731 break;
1732 default:
1733 FAIL;
1734 }
1735 break;
1736
1737 case SRE_OP_ANY:
1738 case SRE_OP_ANY_ALL:
1739 /* These have no operands */
1740 break;
1741
1742 case SRE_OP_IN:
1743 case SRE_OP_IN_IGNORE:
1744 GET_SKIP;
1745 /* Stop 1 before the end; we check the FAILURE below */
1746 if (!_validate_charset(code, code+skip-2))
1747 FAIL;
1748 if (code[skip-2] != SRE_OP_FAILURE)
1749 FAIL;
1750 code += skip-1;
1751 break;
1752
1753 case SRE_OP_INFO:
1754 {
1755 /* A minimal info field is
1756 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1757 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1758 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001759 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001760 SRE_CODE *newcode;
1761 GET_SKIP;
1762 newcode = code+skip-1;
1763 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001764 GET_ARG;
1765 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001766 /* Check that only valid flags are present */
1767 if ((flags & ~(SRE_INFO_PREFIX |
1768 SRE_INFO_LITERAL |
1769 SRE_INFO_CHARSET)) != 0)
1770 FAIL;
1771 /* PREFIX and CHARSET are mutually exclusive */
1772 if ((flags & SRE_INFO_PREFIX) &&
1773 (flags & SRE_INFO_CHARSET))
1774 FAIL;
1775 /* LITERAL implies PREFIX */
1776 if ((flags & SRE_INFO_LITERAL) &&
1777 !(flags & SRE_INFO_PREFIX))
1778 FAIL;
1779 /* Validate the prefix */
1780 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001781 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001782 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001783 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001784 /* Here comes the prefix string */
Benjamin Petersonca470632016-09-06 13:47:26 -07001785 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001786 FAIL;
1787 code += prefix_len;
1788 /* And here comes the overlap table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001789 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001790 FAIL;
1791 /* Each overlap value should be < prefix_len */
1792 for (i = 0; i < prefix_len; i++) {
1793 if (code[i] >= prefix_len)
1794 FAIL;
1795 }
1796 code += prefix_len;
1797 }
1798 /* Validate the charset */
1799 if (flags & SRE_INFO_CHARSET) {
1800 if (!_validate_charset(code, newcode-1))
1801 FAIL;
1802 if (newcode[-1] != SRE_OP_FAILURE)
1803 FAIL;
1804 code = newcode;
1805 }
1806 else if (code != newcode) {
1807 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1808 FAIL;
1809 }
1810 }
1811 break;
1812
1813 case SRE_OP_BRANCH:
1814 {
1815 SRE_CODE *target = NULL;
1816 for (;;) {
1817 GET_SKIP;
1818 if (skip == 0)
1819 break;
1820 /* Stop 2 before the end; we check the JUMP below */
1821 if (!_validate_inner(code, code+skip-3, groups))
1822 FAIL;
1823 code += skip-3;
1824 /* Check that it ends with a JUMP, and that each JUMP
1825 has the same target */
1826 GET_OP;
1827 if (op != SRE_OP_JUMP)
1828 FAIL;
1829 GET_SKIP;
1830 if (target == NULL)
1831 target = code+skip-1;
1832 else if (code+skip-1 != target)
1833 FAIL;
1834 }
1835 }
1836 break;
1837
1838 case SRE_OP_REPEAT_ONE:
1839 case SRE_OP_MIN_REPEAT_ONE:
1840 {
1841 SRE_CODE min, max;
1842 GET_SKIP;
1843 GET_ARG; min = arg;
1844 GET_ARG; max = arg;
1845 if (min > max)
1846 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001847 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001848 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001849 if (!_validate_inner(code, code+skip-4, groups))
1850 FAIL;
1851 code += skip-4;
1852 GET_OP;
1853 if (op != SRE_OP_SUCCESS)
1854 FAIL;
1855 }
1856 break;
1857
1858 case SRE_OP_REPEAT:
1859 {
1860 SRE_CODE min, max;
1861 GET_SKIP;
1862 GET_ARG; min = arg;
1863 GET_ARG; max = arg;
1864 if (min > max)
1865 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001866 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001867 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001868 if (!_validate_inner(code, code+skip-3, groups))
1869 FAIL;
1870 code += skip-3;
1871 GET_OP;
1872 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1873 FAIL;
1874 }
1875 break;
1876
1877 case SRE_OP_GROUPREF:
1878 case SRE_OP_GROUPREF_IGNORE:
1879 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001880 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001881 FAIL;
1882 break;
1883
1884 case SRE_OP_GROUPREF_EXISTS:
1885 /* The regex syntax for this is: '(?(group)then|else)', where
1886 'group' is either an integer group number or a group name,
1887 'then' and 'else' are sub-regexes, and 'else' is optional. */
1888 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001889 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001890 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001891 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001892 code--; /* The skip is relative to the first arg! */
1893 /* There are two possibilities here: if there is both a 'then'
1894 part and an 'else' part, the generated code looks like:
1895
1896 GROUPREF_EXISTS
1897 <group>
1898 <skipyes>
1899 ...then part...
1900 JUMP
1901 <skipno>
1902 (<skipyes> jumps here)
1903 ...else part...
1904 (<skipno> jumps here)
1905
1906 If there is only a 'then' part, it looks like:
1907
1908 GROUPREF_EXISTS
1909 <group>
1910 <skip>
1911 ...then part...
1912 (<skip> jumps here)
1913
1914 There is no direct way to decide which it is, and we don't want
1915 to allow arbitrary jumps anywhere in the code; so we just look
1916 for a JUMP opcode preceding our skip target.
1917 */
Benjamin Petersonca470632016-09-06 13:47:26 -07001918 if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001919 code[skip-3] == SRE_OP_JUMP)
1920 {
1921 VTRACE(("both then and else parts present\n"));
1922 if (!_validate_inner(code+1, code+skip-3, groups))
1923 FAIL;
1924 code += skip-2; /* Position after JUMP, at <skipno> */
1925 GET_SKIP;
1926 if (!_validate_inner(code, code+skip-1, groups))
1927 FAIL;
1928 code += skip-1;
1929 }
1930 else {
1931 VTRACE(("only a then part present\n"));
1932 if (!_validate_inner(code+1, code+skip-1, groups))
1933 FAIL;
1934 code += skip-1;
1935 }
1936 break;
1937
1938 case SRE_OP_ASSERT:
1939 case SRE_OP_ASSERT_NOT:
1940 GET_SKIP;
1941 GET_ARG; /* 0 for lookahead, width for lookbehind */
1942 code--; /* Back up over arg to simplify math below */
1943 if (arg & 0x80000000)
1944 FAIL; /* Width too large */
1945 /* Stop 1 before the end; we check the SUCCESS below */
1946 if (!_validate_inner(code+1, code+skip-2, groups))
1947 FAIL;
1948 code += skip-2;
1949 GET_OP;
1950 if (op != SRE_OP_SUCCESS)
1951 FAIL;
1952 break;
1953
1954 default:
1955 FAIL;
1956
1957 }
1958 }
1959
1960 VTRACE(("okay\n"));
1961 return 1;
1962}
1963
1964static int
1965_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1966{
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001967 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1968 code >= end || end[-1] != SRE_OP_SUCCESS)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001969 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001970 return _validate_inner(code, end-1, groups);
1971}
1972
1973static int
1974_validate(PatternObject *self)
1975{
1976 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1977 {
1978 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1979 return 0;
1980 }
1981 else
1982 VTRACE(("Success!\n"));
1983 return 1;
1984}
1985
1986/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001987/* match methods */
1988
1989static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001990match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001991{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001992 Py_XDECREF(self->regs);
1993 Py_XDECREF(self->string);
1994 Py_DECREF(self->pattern);
1995 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001996}
1997
1998static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001999match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00002000{
Serhiy Storchaka25324972013-10-16 12:46:28 +03002001 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002002 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03002003 Py_buffer view;
2004 PyObject *result;
2005 void* ptr;
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02002006 Py_ssize_t i, j;
Serhiy Storchaka25324972013-10-16 12:46:28 +03002007
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002008 if (index < 0 || index >= self->groups) {
2009 /* raise IndexError if we were given a bad group number */
2010 PyErr_SetString(
2011 PyExc_IndexError,
2012 "no such group"
2013 );
2014 return NULL;
2015 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002016
Fredrik Lundh6f013982000-07-03 18:44:21 +00002017 index *= 2;
2018
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002019 if (self->string == Py_None || self->mark[index] < 0) {
2020 /* return default value if the string or group is undefined */
2021 Py_INCREF(def);
2022 return def;
2023 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002024
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002025 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03002026 if (ptr == NULL)
2027 return NULL;
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02002028
2029 i = self->mark[index];
2030 j = self->mark[index+1];
2031 i = Py_MIN(i, length);
2032 j = Py_MIN(j, length);
2033 result = getslice(isbytes, ptr, self->string, i, j);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002034 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03002035 PyBuffer_Release(&view);
2036 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002037}
2038
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002039static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002040match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002041{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002042 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002043
Guido van Rossumddefaf32007-01-14 03:31:43 +00002044 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002045 /* Default value */
2046 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00002047
Serhiy Storchaka977b3ac2016-06-18 16:48:07 +03002048 if (PyIndex_Check(index)) {
2049 return PyNumber_AsSsize_t(index, NULL);
2050 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002051
Fredrik Lundh6f013982000-07-03 18:44:21 +00002052 i = -1;
2053
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002054 if (self->pattern->groupindex) {
2055 index = PyObject_GetItem(self->pattern->groupindex, index);
2056 if (index) {
Neal Norwitz1fe5f382007-08-31 04:32:55 +00002057 if (PyLong_Check(index))
Christian Heimes217cfd12007-12-02 14:31:20 +00002058 i = PyLong_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00002059 Py_DECREF(index);
2060 } else
2061 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002062 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002063
2064 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002065}
2066
2067static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002068match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002069{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002070 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002071}
2072
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002073/*[clinic input]
2074_sre.SRE_Match.expand
2075
2076 template: object
2077
2078Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
2079[clinic start generated code]*/
2080
2081static PyObject *
2082_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
2083/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002084{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002085 /* delegate to Python code */
2086 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00002087 SRE_PY_MODULE, "_expand",
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002088 PyTuple_Pack(3, self->pattern, self, template)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002089 );
2090}
2091
2092static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002093match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002094{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002095 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002096 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002097
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002098 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002099
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002100 switch (size) {
2101 case 0:
2102 result = match_getslice(self, Py_False, Py_None);
2103 break;
2104 case 1:
2105 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2106 break;
2107 default:
2108 /* fetch multiple items */
2109 result = PyTuple_New(size);
2110 if (!result)
2111 return NULL;
2112 for (i = 0; i < size; i++) {
2113 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002114 self, PyTuple_GET_ITEM(args, i), Py_None
2115 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002116 if (!item) {
2117 Py_DECREF(result);
2118 return NULL;
2119 }
2120 PyTuple_SET_ITEM(result, i, item);
2121 }
2122 break;
2123 }
2124 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002125}
2126
Eric V. Smith605bdae2016-09-11 08:55:43 -04002127static PyObject*
2128match_getitem(MatchObject* self, PyObject* name)
2129{
2130 return match_getslice(self, name, Py_None);
2131}
2132
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002133/*[clinic input]
2134_sre.SRE_Match.groups
2135
2136 default: object = None
2137 Is used for groups that did not participate in the match.
2138
2139Return a tuple containing all the subgroups of the match, from 1.
2140[clinic start generated code]*/
2141
2142static PyObject *
2143_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2144/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002145{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002146 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002147 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002148
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002149 result = PyTuple_New(self->groups-1);
2150 if (!result)
2151 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002152
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002153 for (index = 1; index < self->groups; index++) {
2154 PyObject* item;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002155 item = match_getslice_by_index(self, index, default_value);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002156 if (!item) {
2157 Py_DECREF(result);
2158 return NULL;
2159 }
2160 PyTuple_SET_ITEM(result, index-1, item);
2161 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002162
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002163 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002164}
2165
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002166/*[clinic input]
2167_sre.SRE_Match.groupdict
2168
2169 default: object = None
2170 Is used for groups that did not participate in the match.
2171
2172Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2173[clinic start generated code]*/
2174
2175static PyObject *
2176_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2177/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002178{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002179 PyObject* result;
2180 PyObject* keys;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002181 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002182
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002183 result = PyDict_New();
2184 if (!result || !self->pattern->groupindex)
2185 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002186
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002187 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002188 if (!keys)
2189 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002190
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002191 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002192 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002193 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002194 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002195 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002196 if (!key)
2197 goto failed;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002198 value = match_getslice(self, key, default_value);
Benjamin Peterson3a27b082016-08-15 22:01:41 -07002199 if (!value)
Fredrik Lundh770617b2001-01-14 15:06:11 +00002200 goto failed;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002201 status = PyDict_SetItem(result, key, value);
2202 Py_DECREF(value);
2203 if (status < 0)
2204 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002205 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002206
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002207 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002208
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002209 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002210
2211failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00002212 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002213 Py_DECREF(result);
2214 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002215}
2216
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002217/*[clinic input]
2218_sre.SRE_Match.start -> Py_ssize_t
2219
2220 group: object(c_default="NULL") = 0
2221 /
2222
2223Return index of the start of the substring matched by group.
2224[clinic start generated code]*/
2225
2226static Py_ssize_t
2227_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2228/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002229{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002230 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002231
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002232 if (index < 0 || index >= self->groups) {
2233 PyErr_SetString(
2234 PyExc_IndexError,
2235 "no such group"
2236 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002237 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002238 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002239
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002240 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002241 return self->mark[index*2];
Guido van Rossumb700df92000-03-31 14:59:30 +00002242}
2243
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002244/*[clinic input]
2245_sre.SRE_Match.end -> Py_ssize_t
2246
2247 group: object(c_default="NULL") = 0
2248 /
2249
2250Return index of the end of the substring matched by group.
2251[clinic start generated code]*/
2252
2253static Py_ssize_t
2254_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2255/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002256{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002257 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002258
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002259 if (index < 0 || index >= self->groups) {
2260 PyErr_SetString(
2261 PyExc_IndexError,
2262 "no such group"
2263 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002264 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002265 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002266
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002267 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002268 return self->mark[index*2+1];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002269}
2270
2271LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002272_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002273{
2274 PyObject* pair;
2275 PyObject* item;
2276
2277 pair = PyTuple_New(2);
2278 if (!pair)
2279 return NULL;
2280
Christian Heimes217cfd12007-12-02 14:31:20 +00002281 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002282 if (!item)
2283 goto error;
2284 PyTuple_SET_ITEM(pair, 0, item);
2285
Christian Heimes217cfd12007-12-02 14:31:20 +00002286 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002287 if (!item)
2288 goto error;
2289 PyTuple_SET_ITEM(pair, 1, item);
2290
2291 return pair;
2292
2293 error:
2294 Py_DECREF(pair);
2295 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002296}
2297
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002298/*[clinic input]
2299_sre.SRE_Match.span
2300
2301 group: object(c_default="NULL") = 0
2302 /
2303
2304For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).
2305[clinic start generated code]*/
2306
2307static PyObject *
2308_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2309/*[clinic end generated code: output=f02ae40594d14fe6 input=49092b6008d176d3]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002310{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002311 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002312
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002313 if (index < 0 || index >= self->groups) {
2314 PyErr_SetString(
2315 PyExc_IndexError,
2316 "no such group"
2317 );
2318 return NULL;
2319 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002320
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002321 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002322 return _pair(self->mark[index*2], self->mark[index*2+1]);
2323}
2324
2325static PyObject*
2326match_regs(MatchObject* self)
2327{
2328 PyObject* regs;
2329 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002330 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002331
2332 regs = PyTuple_New(self->groups);
2333 if (!regs)
2334 return NULL;
2335
2336 for (index = 0; index < self->groups; index++) {
2337 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2338 if (!item) {
2339 Py_DECREF(regs);
2340 return NULL;
2341 }
2342 PyTuple_SET_ITEM(regs, index, item);
2343 }
2344
2345 Py_INCREF(regs);
2346 self->regs = regs;
2347
2348 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002349}
2350
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002351/*[clinic input]
2352_sre.SRE_Match.__copy__
2353
2354[clinic start generated code]*/
2355
2356static PyObject *
2357_sre_SRE_Match___copy___impl(MatchObject *self)
2358/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002359{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002360#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002361 MatchObject* copy;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002362 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00002363
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002364 slots = 2 * (self->pattern->groups+1);
2365
2366 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2367 if (!copy)
2368 return NULL;
2369
2370 /* this value a constant, but any compiler should be able to
2371 figure that out all by itself */
2372 offset = offsetof(MatchObject, string);
2373
2374 Py_XINCREF(self->pattern);
2375 Py_XINCREF(self->string);
2376 Py_XINCREF(self->regs);
2377
2378 memcpy((char*) copy + offset, (char*) self + offset,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002379 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002380
2381 return (PyObject*) copy;
2382#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002383 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002384 return NULL;
2385#endif
2386}
2387
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002388/*[clinic input]
2389_sre.SRE_Match.__deepcopy__
2390
2391 memo: object
2392
2393[clinic start generated code]*/
2394
2395static PyObject *
2396_sre_SRE_Match___deepcopy___impl(MatchObject *self, PyObject *memo)
2397/*[clinic end generated code: output=2b657578eb03f4a3 input=b65b72489eac64cc]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002398{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002399#ifdef USE_BUILTIN_COPY
2400 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002401
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002402 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002403 if (!copy)
2404 return NULL;
2405
2406 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2407 !deepcopy(&copy->string, memo) ||
2408 !deepcopy(&copy->regs, memo)) {
2409 Py_DECREF(copy);
2410 return NULL;
2411 }
2412
2413#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002414 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2415 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002416#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002417}
2418
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002419PyDoc_STRVAR(match_doc,
2420"The result of re.match() and re.search().\n\
2421Match objects always have a boolean value of True.");
2422
2423PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002424"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002425 Return subgroup(s) of the match by indices or names.\n\
2426 For 0 returns the entire match.");
2427
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002428static PyObject *
2429match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002430{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002431 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002432 return PyLong_FromSsize_t(self->lastindex);
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002433 Py_INCREF(Py_None);
2434 return Py_None;
Guido van Rossumb700df92000-03-31 14:59:30 +00002435}
2436
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002437static PyObject *
2438match_lastgroup_get(MatchObject *self)
2439{
2440 if (self->pattern->indexgroup && self->lastindex >= 0) {
2441 PyObject* result = PySequence_GetItem(
2442 self->pattern->indexgroup, self->lastindex
2443 );
2444 if (result)
2445 return result;
2446 PyErr_Clear();
2447 }
2448 Py_INCREF(Py_None);
2449 return Py_None;
2450}
2451
2452static PyObject *
2453match_regs_get(MatchObject *self)
2454{
2455 if (self->regs) {
2456 Py_INCREF(self->regs);
2457 return self->regs;
2458 } else
2459 return match_regs(self);
2460}
2461
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002462static PyObject *
2463match_repr(MatchObject *self)
2464{
2465 PyObject *result;
2466 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2467 if (group0 == NULL)
2468 return NULL;
2469 result = PyUnicode_FromFormat(
2470 "<%s object; span=(%d, %d), match=%.50R>",
2471 Py_TYPE(self)->tp_name,
2472 self->mark[0], self->mark[1], group0);
2473 Py_DECREF(group0);
2474 return result;
2475}
2476
2477
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002478static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002479pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002480{
2481 /* create match object (from state object) */
2482
2483 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002484 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002485 char* base;
2486 int n;
2487
2488 if (status > 0) {
2489
2490 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002491 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002492 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2493 2*(pattern->groups+1));
2494 if (!match)
2495 return NULL;
2496
2497 Py_INCREF(pattern);
2498 match->pattern = pattern;
2499
2500 Py_INCREF(state->string);
2501 match->string = state->string;
2502
2503 match->regs = NULL;
2504 match->groups = pattern->groups+1;
2505
2506 /* fill in group slices */
2507
2508 base = (char*) state->beginning;
2509 n = state->charsize;
2510
2511 match->mark[0] = ((char*) state->start - base) / n;
2512 match->mark[1] = ((char*) state->ptr - base) / n;
2513
2514 for (i = j = 0; i < pattern->groups; i++, j+=2)
2515 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2516 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2517 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2518 } else
2519 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2520
2521 match->pos = state->pos;
2522 match->endpos = state->endpos;
2523
2524 match->lastindex = state->lastindex;
2525
2526 return (PyObject*) match;
2527
2528 } else if (status == 0) {
2529
2530 /* no match */
2531 Py_INCREF(Py_None);
2532 return Py_None;
2533
2534 }
2535
2536 /* internal error */
2537 pattern_error(status);
2538 return NULL;
2539}
2540
2541
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002542/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002543/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002544
2545static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002546scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002547{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002548 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002549 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002550 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002551}
2552
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002553/*[clinic input]
2554_sre.SRE_Scanner.match
2555
2556[clinic start generated code]*/
2557
2558static PyObject *
2559_sre_SRE_Scanner_match_impl(ScannerObject *self)
2560/*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002561{
2562 SRE_STATE* state = &self->state;
2563 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002564 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002565
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002566 if (state->start == NULL)
2567 Py_RETURN_NONE;
2568
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002569 state_reset(state);
2570
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002571 state->ptr = state->start;
2572
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03002573 status = sre_match(state, PatternObject_GetCode(self->pattern), 0);
Thomas Wouters89f507f2006-12-13 04:49:30 +00002574 if (PyErr_Occurred())
2575 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002576
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002577 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002578 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002579
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002580 if (status == 0)
2581 state->start = NULL;
2582 else if (state->ptr != state->start)
2583 state->start = state->ptr;
2584 else if (state->ptr != state->end)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002585 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002586 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002587 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002588
2589 return match;
2590}
2591
2592
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002593/*[clinic input]
2594_sre.SRE_Scanner.search
2595
2596[clinic start generated code]*/
2597
2598static PyObject *
2599_sre_SRE_Scanner_search_impl(ScannerObject *self)
2600/*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002601{
2602 SRE_STATE* state = &self->state;
2603 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002604 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002605
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002606 if (state->start == NULL)
2607 Py_RETURN_NONE;
2608
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002609 state_reset(state);
2610
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002611 state->ptr = state->start;
2612
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002613 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002614 if (PyErr_Occurred())
2615 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002616
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002617 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002618 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002619
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002620 if (status == 0)
2621 state->start = NULL;
2622 else if (state->ptr != state->start)
2623 state->start = state->ptr;
2624 else if (state->ptr != state->end)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002625 state->start = (void*) ((char*) state->ptr + state->charsize);
2626 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002627 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002628
2629 return match;
2630}
2631
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002632static PyObject *
2633pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002634{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002635 ScannerObject* scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002636
2637 /* create scanner object */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002638 scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2639 if (!scanner)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002640 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002641 scanner->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002642
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002643 /* create search state object */
2644 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2645 Py_DECREF(scanner);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002646 return NULL;
2647 }
2648
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002649 Py_INCREF(self);
2650 scanner->pattern = (PyObject*) self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002651
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002652 return (PyObject*) scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002653}
2654
Victor Stinnerb44fb122016-11-21 16:35:08 +01002655static Py_hash_t
2656pattern_hash(PatternObject *self)
2657{
2658 Py_hash_t hash, hash2;
2659
2660 hash = PyObject_Hash(self->pattern);
2661 if (hash == -1) {
2662 return -1;
2663 }
2664
2665 hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2666 hash ^= hash2;
2667
2668 hash ^= self->flags;
2669 hash ^= self->isbytes;
2670 hash ^= self->codesize;
2671
2672 if (hash == -1) {
2673 hash = -2;
2674 }
2675 return hash;
2676}
2677
2678static PyObject*
2679pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2680{
2681 PatternObject *left, *right;
2682 int cmp;
2683
2684 if (op != Py_EQ && op != Py_NE) {
2685 Py_RETURN_NOTIMPLEMENTED;
2686 }
2687
2688 if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
2689 Py_RETURN_NOTIMPLEMENTED;
2690 }
Victor Stinnerbcf4dcc2016-11-22 15:30:38 +01002691
2692 if (lefto == righto) {
2693 /* a pattern is equal to itself */
2694 return PyBool_FromLong(op == Py_EQ);
2695 }
2696
Victor Stinnerb44fb122016-11-21 16:35:08 +01002697 left = (PatternObject *)lefto;
2698 right = (PatternObject *)righto;
2699
2700 cmp = (left->flags == right->flags
2701 && left->isbytes == right->isbytes
Victor Stinnere670b2d2016-11-22 15:23:00 +01002702 && left->codesize == right->codesize);
Victor Stinnerb44fb122016-11-21 16:35:08 +01002703 if (cmp) {
2704 /* Compare the code and the pattern because the same pattern can
2705 produce different codes depending on the locale used to compile the
2706 pattern when the re.LOCALE flag is used. Don't compare groups,
2707 indexgroup nor groupindex: they are derivated from the pattern. */
2708 cmp = (memcmp(left->code, right->code,
2709 sizeof(left->code[0]) * left->codesize) == 0);
2710 }
2711 if (cmp) {
2712 cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2713 Py_EQ);
2714 if (cmp < 0) {
2715 return NULL;
2716 }
2717 }
2718 if (op == Py_NE) {
2719 cmp = !cmp;
2720 }
2721 return PyBool_FromLong(cmp);
2722}
2723
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002724#include "clinic/_sre.c.h"
2725
2726static PyMethodDef pattern_methods[] = {
2727 _SRE_SRE_PATTERN_MATCH_METHODDEF
2728 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2729 _SRE_SRE_PATTERN_SEARCH_METHODDEF
2730 _SRE_SRE_PATTERN_SUB_METHODDEF
2731 _SRE_SRE_PATTERN_SUBN_METHODDEF
2732 _SRE_SRE_PATTERN_FINDALL_METHODDEF
2733 _SRE_SRE_PATTERN_SPLIT_METHODDEF
2734 _SRE_SRE_PATTERN_FINDITER_METHODDEF
2735 _SRE_SRE_PATTERN_SCANNER_METHODDEF
2736 _SRE_SRE_PATTERN___COPY___METHODDEF
2737 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2738 {NULL, NULL}
2739};
2740
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002741static PyGetSetDef pattern_getset[] = {
2742 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2743 "A dictionary mapping group names to group numbers."},
2744 {NULL} /* Sentinel */
2745};
2746
2747#define PAT_OFF(x) offsetof(PatternObject, x)
2748static PyMemberDef pattern_members[] = {
2749 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
2750 {"flags", T_INT, PAT_OFF(flags), READONLY},
2751 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
2752 {NULL} /* Sentinel */
2753};
2754
2755static PyTypeObject Pattern_Type = {
2756 PyVarObject_HEAD_INIT(NULL, 0)
2757 "_" SRE_MODULE ".SRE_Pattern",
2758 sizeof(PatternObject), sizeof(SRE_CODE),
2759 (destructor)pattern_dealloc, /* tp_dealloc */
2760 0, /* tp_print */
2761 0, /* tp_getattr */
2762 0, /* tp_setattr */
2763 0, /* tp_reserved */
2764 (reprfunc)pattern_repr, /* tp_repr */
2765 0, /* tp_as_number */
2766 0, /* tp_as_sequence */
2767 0, /* tp_as_mapping */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002768 (hashfunc)pattern_hash, /* tp_hash */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002769 0, /* tp_call */
2770 0, /* tp_str */
2771 0, /* tp_getattro */
2772 0, /* tp_setattro */
2773 0, /* tp_as_buffer */
2774 Py_TPFLAGS_DEFAULT, /* tp_flags */
2775 pattern_doc, /* tp_doc */
2776 0, /* tp_traverse */
2777 0, /* tp_clear */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002778 pattern_richcompare, /* tp_richcompare */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002779 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2780 0, /* tp_iter */
2781 0, /* tp_iternext */
2782 pattern_methods, /* tp_methods */
2783 pattern_members, /* tp_members */
2784 pattern_getset, /* tp_getset */
2785};
2786
Eric V. Smith605bdae2016-09-11 08:55:43 -04002787/* Match objects do not support length or assignment, but do support
2788 __getitem__. */
2789static PyMappingMethods match_as_mapping = {
2790 NULL,
2791 (binaryfunc)match_getitem,
2792 NULL
2793};
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002794
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002795static PyMethodDef match_methods[] = {
2796 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2797 _SRE_SRE_MATCH_START_METHODDEF
2798 _SRE_SRE_MATCH_END_METHODDEF
2799 _SRE_SRE_MATCH_SPAN_METHODDEF
2800 _SRE_SRE_MATCH_GROUPS_METHODDEF
2801 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2802 _SRE_SRE_MATCH_EXPAND_METHODDEF
2803 _SRE_SRE_MATCH___COPY___METHODDEF
2804 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2805 {NULL, NULL}
2806};
2807
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002808static PyGetSetDef match_getset[] = {
2809 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
2810 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
2811 {"regs", (getter)match_regs_get, (setter)NULL},
2812 {NULL}
2813};
2814
2815#define MATCH_OFF(x) offsetof(MatchObject, x)
2816static PyMemberDef match_members[] = {
2817 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
2818 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
2819 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
2820 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
2821 {NULL}
2822};
2823
2824/* FIXME: implement setattr("string", None) as a special case (to
2825 detach the associated string, if any */
2826
2827static PyTypeObject Match_Type = {
2828 PyVarObject_HEAD_INIT(NULL,0)
2829 "_" SRE_MODULE ".SRE_Match",
2830 sizeof(MatchObject), sizeof(Py_ssize_t),
2831 (destructor)match_dealloc, /* tp_dealloc */
2832 0, /* tp_print */
2833 0, /* tp_getattr */
2834 0, /* tp_setattr */
2835 0, /* tp_reserved */
2836 (reprfunc)match_repr, /* tp_repr */
2837 0, /* tp_as_number */
2838 0, /* tp_as_sequence */
Eric V. Smith605bdae2016-09-11 08:55:43 -04002839 &match_as_mapping, /* tp_as_mapping */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002840 0, /* tp_hash */
2841 0, /* tp_call */
2842 0, /* tp_str */
2843 0, /* tp_getattro */
2844 0, /* tp_setattro */
2845 0, /* tp_as_buffer */
2846 Py_TPFLAGS_DEFAULT, /* tp_flags */
2847 match_doc, /* tp_doc */
2848 0, /* tp_traverse */
2849 0, /* tp_clear */
2850 0, /* tp_richcompare */
2851 0, /* tp_weaklistoffset */
2852 0, /* tp_iter */
2853 0, /* tp_iternext */
2854 match_methods, /* tp_methods */
2855 match_members, /* tp_members */
2856 match_getset, /* tp_getset */
2857};
2858
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002859static PyMethodDef scanner_methods[] = {
2860 _SRE_SRE_SCANNER_MATCH_METHODDEF
2861 _SRE_SRE_SCANNER_SEARCH_METHODDEF
2862 {NULL, NULL}
2863};
2864
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002865#define SCAN_OFF(x) offsetof(ScannerObject, x)
2866static PyMemberDef scanner_members[] = {
2867 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2868 {NULL} /* Sentinel */
2869};
2870
2871static PyTypeObject Scanner_Type = {
2872 PyVarObject_HEAD_INIT(NULL, 0)
2873 "_" SRE_MODULE ".SRE_Scanner",
2874 sizeof(ScannerObject), 0,
2875 (destructor)scanner_dealloc,/* tp_dealloc */
2876 0, /* tp_print */
2877 0, /* tp_getattr */
2878 0, /* tp_setattr */
2879 0, /* tp_reserved */
2880 0, /* tp_repr */
2881 0, /* tp_as_number */
2882 0, /* tp_as_sequence */
2883 0, /* tp_as_mapping */
2884 0, /* tp_hash */
2885 0, /* tp_call */
2886 0, /* tp_str */
2887 0, /* tp_getattro */
2888 0, /* tp_setattro */
2889 0, /* tp_as_buffer */
2890 Py_TPFLAGS_DEFAULT, /* tp_flags */
2891 0, /* tp_doc */
2892 0, /* tp_traverse */
2893 0, /* tp_clear */
2894 0, /* tp_richcompare */
2895 0, /* tp_weaklistoffset */
2896 0, /* tp_iter */
2897 0, /* tp_iternext */
2898 scanner_methods, /* tp_methods */
2899 scanner_members, /* tp_members */
2900 0, /* tp_getset */
2901};
2902
Guido van Rossumb700df92000-03-31 14:59:30 +00002903static PyMethodDef _functions[] = {
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002904 _SRE_COMPILE_METHODDEF
2905 _SRE_GETCODESIZE_METHODDEF
2906 _SRE_GETLOWER_METHODDEF
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002907 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002908};
2909
Martin v. Löwis1a214512008-06-11 05:26:20 +00002910static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002911 PyModuleDef_HEAD_INIT,
2912 "_" SRE_MODULE,
2913 NULL,
2914 -1,
2915 _functions,
2916 NULL,
2917 NULL,
2918 NULL,
2919 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002920};
2921
2922PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002923{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002924 PyObject* m;
2925 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002926 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002927
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002928 /* Patch object types */
2929 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2930 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002931 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002932
Martin v. Löwis1a214512008-06-11 05:26:20 +00002933 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002934 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002935 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002936 d = PyModule_GetDict(m);
2937
Christian Heimes217cfd12007-12-02 14:31:20 +00002938 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002939 if (x) {
2940 PyDict_SetItemString(d, "MAGIC", x);
2941 Py_DECREF(x);
2942 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002943
Christian Heimes217cfd12007-12-02 14:31:20 +00002944 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002945 if (x) {
2946 PyDict_SetItemString(d, "CODESIZE", x);
2947 Py_DECREF(x);
2948 }
2949
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002950 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2951 if (x) {
2952 PyDict_SetItemString(d, "MAXREPEAT", x);
2953 Py_DECREF(x);
2954 }
2955
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03002956 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2957 if (x) {
2958 PyDict_SetItemString(d, "MAXGROUPS", x);
2959 Py_DECREF(x);
2960 }
2961
Neal Norwitzfe537132007-08-26 03:55:15 +00002962 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002963 if (x) {
2964 PyDict_SetItemString(d, "copyright", x);
2965 Py_DECREF(x);
2966 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002967 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002968}
2969
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002970/* vim:ts=4:sw=4:et
2971*/