blob: 6e149011017ea36c8d122480d8ad0b784d7fc36b [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020038static const char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000063/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000065/* enables copy/deepcopy handling (work in progress) */
66#undef USE_BUILTIN_COPY
67
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000068/* -------------------------------------------------------------------- */
69
Fredrik Lundh80946112000-06-29 18:03:25 +000070#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000071#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000072#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000073/* fastest possible local call under MSVC */
74#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000075#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000076#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000077#else
78#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000079#endif
80
81/* error codes */
82#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000083#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000084#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000085#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000086#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000087
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000088#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000089#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000090#else
91#define TRACE(v)
92#endif
93
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000094/* -------------------------------------------------------------------- */
95/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000096
Fredrik Lundh436c3d582000-06-29 08:58:44 +000097#define SRE_IS_DIGIT(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030098 ((ch) < 128 && Py_ISDIGIT(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000099#define SRE_IS_SPACE(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300100 ((ch) < 128 && Py_ISSPACE(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000101#define SRE_IS_LINEBREAK(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300102 ((ch) == '\n')
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000103#define SRE_IS_ALNUM(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300104 ((ch) < 128 && Py_ISALNUM(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000105#define SRE_IS_WORD(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300106 ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
Guido van Rossumb700df92000-03-31 14:59:30 +0000107
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000108static unsigned int sre_lower(unsigned int ch)
109{
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300110 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000111}
112
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200113static unsigned int sre_upper(unsigned int ch)
114{
115 return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
116}
117
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000118/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000119/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
120 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000121#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000122#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
123
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000124static unsigned int sre_lower_locale(unsigned int ch)
125{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000126 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000127}
128
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200129static unsigned int sre_upper_locale(unsigned int ch)
130{
131 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
132}
133
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000134/* unicode-specific character predicates */
135
Victor Stinner0058b862011-09-29 03:27:47 +0200136#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
137#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
138#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
139#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
140#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000141
142static unsigned int sre_lower_unicode(unsigned int ch)
143{
Victor Stinner0058b862011-09-29 03:27:47 +0200144 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000145}
146
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200147static unsigned int sre_upper_unicode(unsigned int ch)
148{
149 return (unsigned int) Py_UNICODE_TOUPPER(ch);
150}
151
Guido van Rossumb700df92000-03-31 14:59:30 +0000152LOCAL(int)
153sre_category(SRE_CODE category, unsigned int ch)
154{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000155 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000156
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000157 case SRE_CATEGORY_DIGIT:
158 return SRE_IS_DIGIT(ch);
159 case SRE_CATEGORY_NOT_DIGIT:
160 return !SRE_IS_DIGIT(ch);
161 case SRE_CATEGORY_SPACE:
162 return SRE_IS_SPACE(ch);
163 case SRE_CATEGORY_NOT_SPACE:
164 return !SRE_IS_SPACE(ch);
165 case SRE_CATEGORY_WORD:
166 return SRE_IS_WORD(ch);
167 case SRE_CATEGORY_NOT_WORD:
168 return !SRE_IS_WORD(ch);
169 case SRE_CATEGORY_LINEBREAK:
170 return SRE_IS_LINEBREAK(ch);
171 case SRE_CATEGORY_NOT_LINEBREAK:
172 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000173
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000174 case SRE_CATEGORY_LOC_WORD:
175 return SRE_LOC_IS_WORD(ch);
176 case SRE_CATEGORY_LOC_NOT_WORD:
177 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000178
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000179 case SRE_CATEGORY_UNI_DIGIT:
180 return SRE_UNI_IS_DIGIT(ch);
181 case SRE_CATEGORY_UNI_NOT_DIGIT:
182 return !SRE_UNI_IS_DIGIT(ch);
183 case SRE_CATEGORY_UNI_SPACE:
184 return SRE_UNI_IS_SPACE(ch);
185 case SRE_CATEGORY_UNI_NOT_SPACE:
186 return !SRE_UNI_IS_SPACE(ch);
187 case SRE_CATEGORY_UNI_WORD:
188 return SRE_UNI_IS_WORD(ch);
189 case SRE_CATEGORY_UNI_NOT_WORD:
190 return !SRE_UNI_IS_WORD(ch);
191 case SRE_CATEGORY_UNI_LINEBREAK:
192 return SRE_UNI_IS_LINEBREAK(ch);
193 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
194 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000195 }
196 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000197}
198
199/* helpers */
200
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000201static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000202data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000203{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000204 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000205 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000206 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000207 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000208 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000209}
210
211static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000212data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000213{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000214 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000215 minsize = state->data_stack_base+size;
216 cursize = state->data_stack_size;
217 if (cursize < minsize) {
218 void* stack;
219 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300220 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000221 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000222 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000223 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000224 return SRE_ERROR_MEMORY;
225 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000226 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000227 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000228 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000229 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000230}
231
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000232/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000233
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300234#define SRE_CHAR Py_UCS1
235#define SIZEOF_SRE_CHAR 1
236#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300237#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000238
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300239/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000240
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300241#define SRE_CHAR Py_UCS2
242#define SIZEOF_SRE_CHAR 2
243#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300244#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000245
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300246/* generate 32-bit unicode version */
247
248#define SRE_CHAR Py_UCS4
249#define SIZEOF_SRE_CHAR 4
250#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300251#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000252
253/* -------------------------------------------------------------------- */
254/* factories and destructors */
255
256/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100257static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300258static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
Guido van Rossumb700df92000-03-31 14:59:30 +0000259
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300260
261/*[clinic input]
262module _sre
263class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
264class _sre.SRE_Match "MatchObject *" "&Match_Type"
265class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
266[clinic start generated code]*/
267/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
268
Larry Hastings2d0a69a2015-05-03 14:49:19 -0700269static PyTypeObject Pattern_Type;
270static PyTypeObject Match_Type;
271static PyTypeObject Scanner_Type;
272
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300273/*[clinic input]
274_sre.getcodesize -> int
275[clinic start generated code]*/
276
277static int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300278_sre_getcodesize_impl(PyObject *module)
279/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000280{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300281 return sizeof(SRE_CODE);
Guido van Rossumb700df92000-03-31 14:59:30 +0000282}
283
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300284/*[clinic input]
285_sre.getlower -> int
286
287 character: int
288 flags: int
289 /
290
291[clinic start generated code]*/
292
293static int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300294_sre_getlower_impl(PyObject *module, int character, int flags)
295/*[clinic end generated code: output=47eebc4c1214feb5 input=087d2f1c44bbca6f]*/
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000296{
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000297 if (flags & SRE_FLAG_LOCALE)
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300298 return sre_lower_locale(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000299 if (flags & SRE_FLAG_UNICODE)
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300300 return sre_lower_unicode(character);
301 return sre_lower(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000302}
303
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000304LOCAL(void)
305state_reset(SRE_STATE* state)
306{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000307 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000308 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000309
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000310 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000311 state->lastindex = -1;
312
313 state->repeat = NULL;
314
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000315 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000316}
317
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000318static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200319getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300320 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600321 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000322{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000323 /* given a python object, return a data pointer, a length (in
324 characters), and a character size. return NULL if the object
325 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000326
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000327 /* Unicode objects do not support the buffer API. So, get the data
328 directly instead. */
329 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200330 if (PyUnicode_READY(string) == -1)
331 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200332 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200333 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300334 *p_isbytes = 0;
335 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000336 }
337
Victor Stinner0058b862011-09-29 03:27:47 +0200338 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300339 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200340 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300341 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000342 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000343
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300344 *p_length = view->len;
345 *p_charsize = 1;
346 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000347
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300348 if (view->buf == NULL) {
349 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
350 PyBuffer_Release(view);
351 view->buf = NULL;
352 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000353 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300354 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000355}
356
357LOCAL(PyObject*)
358state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000359 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000360{
361 /* prepare state object */
362
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000363 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300364 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000365 void* ptr;
366
367 memset(state, 0, sizeof(SRE_STATE));
368
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300369 state->mark = PyMem_New(void *, pattern->groups * 2);
370 if (!state->mark) {
371 PyErr_NoMemory();
372 goto err;
373 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000374 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000375 state->lastindex = -1;
376
Benjamin Petersone48944b2012-03-07 14:50:25 -0600377 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300378 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000379 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600380 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000381
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300382 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600383 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200384 "cannot use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600385 goto err;
386 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300387 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600388 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200389 "cannot use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600390 goto err;
391 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000392
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000393 /* adjust boundaries */
394 if (start < 0)
395 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000396 else if (start > length)
397 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000398
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000399 if (end < 0)
400 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000401 else if (end > length)
402 end = length;
403
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300404 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000405 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000406
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000407 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000408
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000409 state->start = (void*) ((char*) ptr + start * state->charsize);
410 state->end = (void*) ((char*) ptr + end * state->charsize);
411
412 Py_INCREF(string);
413 state->string = string;
414 state->pos = start;
415 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000416
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200417 if (pattern->flags & SRE_FLAG_LOCALE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000418 state->lower = sre_lower_locale;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200419 state->upper = sre_upper_locale;
420 }
421 else if (pattern->flags & SRE_FLAG_UNICODE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000422 state->lower = sre_lower_unicode;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200423 state->upper = sre_upper_unicode;
424 }
425 else {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000426 state->lower = sre_lower;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200427 state->upper = sre_upper;
428 }
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000429
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000430 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600431 err:
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300432 PyMem_Del(state->mark);
433 state->mark = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600434 if (state->buffer.buf)
435 PyBuffer_Release(&state->buffer);
436 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000437}
438
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000439LOCAL(void)
440state_fini(SRE_STATE* state)
441{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600442 if (state->buffer.buf)
443 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000444 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000445 data_stack_dealloc(state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300446 PyMem_Del(state->mark);
447 state->mark = NULL;
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000448}
449
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000450/* calculate offset from start of string */
451#define STATE_OFFSET(state, member)\
452 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
453
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000454LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300455getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300456 PyObject* string, Py_ssize_t start, Py_ssize_t end)
457{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300458 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300459 if (PyBytes_CheckExact(string) &&
460 start == 0 && end == PyBytes_GET_SIZE(string)) {
461 Py_INCREF(string);
462 return string;
463 }
464 return PyBytes_FromStringAndSize(
465 (const char *)ptr + start, end - start);
466 }
467 else {
468 return PyUnicode_Substring(string, start, end);
469 }
470}
471
472LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000473state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000474{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000475 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000476
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000477 index = (index - 1) * 2;
478
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000479 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000480 if (empty)
481 /* want empty string */
482 i = j = 0;
483 else {
484 Py_INCREF(Py_None);
485 return Py_None;
486 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000487 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000488 i = STATE_OFFSET(state, state->mark[index]);
489 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000490 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000491
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300492 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000493}
494
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000495static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100496pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000497{
498 switch (status) {
499 case SRE_ERROR_RECURSION_LIMIT:
Yury Selivanovf488fb42015-07-03 01:04:23 -0400500 /* This error code seems to be unused. */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000501 PyErr_SetString(
Yury Selivanovf488fb42015-07-03 01:04:23 -0400502 PyExc_RecursionError,
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000503 "maximum recursion limit exceeded"
504 );
505 break;
506 case SRE_ERROR_MEMORY:
507 PyErr_NoMemory();
508 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000509 case SRE_ERROR_INTERRUPTED:
510 /* An exception has already been raised, so let it fly */
511 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000512 default:
513 /* other error codes indicate compiler/engine bugs */
514 PyErr_SetString(
515 PyExc_RuntimeError,
516 "internal error in regular expression engine"
517 );
518 }
519}
520
Guido van Rossumb700df92000-03-31 14:59:30 +0000521static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000522pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000523{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000524 if (self->weakreflist != NULL)
525 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000526 Py_XDECREF(self->pattern);
527 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000528 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000529 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000530}
531
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300532LOCAL(Py_ssize_t)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300533sre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300534{
535 if (state->charsize == 1)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300536 return sre_ucs1_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300537 if (state->charsize == 2)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300538 return sre_ucs2_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300539 assert(state->charsize == 4);
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300540 return sre_ucs4_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300541}
542
543LOCAL(Py_ssize_t)
544sre_search(SRE_STATE* state, SRE_CODE* pattern)
545{
546 if (state->charsize == 1)
547 return sre_ucs1_search(state, pattern);
548 if (state->charsize == 2)
549 return sre_ucs2_search(state, pattern);
550 assert(state->charsize == 4);
551 return sre_ucs4_search(state, pattern);
552}
553
Larry Hastings16c51912014-01-07 11:53:01 -0800554static PyObject *
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200555fix_string_param(PyObject *string, PyObject *string2, const char *oldname)
556{
557 if (string2 != NULL) {
558 if (string != NULL) {
559 PyErr_Format(PyExc_TypeError,
560 "Argument given by name ('%s') and position (1)",
561 oldname);
562 return NULL;
563 }
564 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
565 "The '%s' keyword parameter name is deprecated. "
566 "Use 'string' instead.", oldname) < 0)
567 return NULL;
568 return string2;
569 }
570 if (string == NULL) {
571 PyErr_SetString(PyExc_TypeError,
572 "Required argument 'string' (pos 1) not found");
573 return NULL;
574 }
575 return string;
576}
Larry Hastings16c51912014-01-07 11:53:01 -0800577
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300578/*[clinic input]
579_sre.SRE_Pattern.match
580
581 string: object = NULL
582 pos: Py_ssize_t = 0
583 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
584 *
585 pattern: object = NULL
586
587Matches zero or more characters at the beginning of the string.
588[clinic start generated code]*/
589
Larry Hastings16c51912014-01-07 11:53:01 -0800590static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300591_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
592 Py_ssize_t pos, Py_ssize_t endpos,
593 PyObject *pattern)
594/*[clinic end generated code: output=74b4b1da3bb2d84e input=3d079aa99979b81d]*/
Larry Hastings16c51912014-01-07 11:53:01 -0800595{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000596 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100597 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300598 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000599
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200600 string = fix_string_param(string, pattern, "pattern");
601 if (!string)
602 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300603 if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000604 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000605
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000606 state.ptr = state.start;
607
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000608 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
609
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300610 status = sre_match(&state, PatternObject_GetCode(self), 0);
Guido van Rossumb700df92000-03-31 14:59:30 +0000611
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000612 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300613 if (PyErr_Occurred()) {
614 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000615 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300616 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000617
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300618 match = pattern_new_match(self, &state, status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000619 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300620 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000621}
622
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300623/*[clinic input]
624_sre.SRE_Pattern.fullmatch
625
626 string: object = NULL
627 pos: Py_ssize_t = 0
628 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
629 *
630 pattern: object = NULL
631
632Matches against all of the string
633[clinic start generated code]*/
634
635static PyObject *
636_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
637 Py_ssize_t pos, Py_ssize_t endpos,
638 PyObject *pattern)
639/*[clinic end generated code: output=1c98bc5da744ea94 input=d4228606cc12580f]*/
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200640{
641 SRE_STATE state;
642 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300643 PyObject *match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200644
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300645 string = fix_string_param(string, pattern, "pattern");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200646 if (!string)
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200647 return NULL;
648
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300649 if (!state_init(&state, self, string, pos, endpos))
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200650 return NULL;
651
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200652 state.ptr = state.start;
653
654 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
655
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300656 status = sre_match(&state, PatternObject_GetCode(self), 1);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200657
658 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300659 if (PyErr_Occurred()) {
660 state_fini(&state);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200661 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300662 }
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200663
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300664 match = pattern_new_match(self, &state, status);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200665 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300666 return match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200667}
668
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300669/*[clinic input]
670_sre.SRE_Pattern.search
671
672 string: object = NULL
673 pos: Py_ssize_t = 0
674 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
675 *
676 pattern: object = NULL
677
678Scan through string looking for a match, and return a corresponding match object instance.
679
680Return None if no position in the string matches.
681[clinic start generated code]*/
682
683static PyObject *
684_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
685 Py_ssize_t pos, Py_ssize_t endpos,
686 PyObject *pattern)
687/*[clinic end generated code: output=3839394a18e5ea4f input=dab42720f4be3a4b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000688{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000689 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100690 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300691 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000692
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300693 string = fix_string_param(string, pattern, "pattern");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200694 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000695 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000696
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300697 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000698 return NULL;
699
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000700 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
701
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300702 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000703
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000704 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
705
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300706 if (PyErr_Occurred()) {
707 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000708 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300709 }
Thomas Wouters89f507f2006-12-13 04:49:30 +0000710
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300711 match = pattern_new_match(self, &state, status);
712 state_fini(&state);
713 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000714}
715
716static PyObject*
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200717call(const char* module, const char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000718{
719 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000720 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000721 PyObject* func;
722 PyObject* result;
723
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000724 if (!args)
725 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000726 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000727 if (!name)
728 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000729 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000730 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000731 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000732 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000733 func = PyObject_GetAttrString(mod, function);
734 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000735 if (!func)
736 return NULL;
737 result = PyObject_CallObject(func, args);
738 Py_DECREF(func);
739 Py_DECREF(args);
740 return result;
741}
742
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000743#ifdef USE_BUILTIN_COPY
744static int
745deepcopy(PyObject** object, PyObject* memo)
746{
747 PyObject* copy;
748
749 copy = call(
750 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000751 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000752 );
753 if (!copy)
754 return 0;
755
Serhiy Storchaka57a01d32016-04-10 18:05:40 +0300756 Py_SETREF(*object, copy);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000757
758 return 1; /* success */
759}
760#endif
761
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300762/*[clinic input]
763_sre.SRE_Pattern.findall
764
765 string: object = NULL
766 pos: Py_ssize_t = 0
767 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
768 *
769 source: object = NULL
770
771Return a list of all non-overlapping matches of pattern in string.
772[clinic start generated code]*/
773
774static PyObject *
775_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
776 Py_ssize_t pos, Py_ssize_t endpos,
777 PyObject *source)
778/*[clinic end generated code: output=51295498b300639d input=df688355c056b9de]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000779{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000780 SRE_STATE state;
781 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100782 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000783 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000784
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300785 string = fix_string_param(string, source, "source");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200786 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000787 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000788
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300789 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000790 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000791
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000792 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000793 if (!list) {
794 state_fini(&state);
795 return NULL;
796 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000797
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000798 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000799
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000800 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000801
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000802 state_reset(&state);
803
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000804 state.ptr = state.start;
805
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300806 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300807 if (PyErr_Occurred())
808 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000809
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000810 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000811 if (status == 0)
812 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000813 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000814 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000815 }
Tim Peters3d563502006-01-21 02:47:53 +0000816
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000817 /* don't bother to build a match object */
818 switch (self->groups) {
819 case 0:
820 b = STATE_OFFSET(&state, state.start);
821 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300822 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300823 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000824 if (!item)
825 goto error;
826 break;
827 case 1:
828 item = state_getslice(&state, 1, string, 1);
829 if (!item)
830 goto error;
831 break;
832 default:
833 item = PyTuple_New(self->groups);
834 if (!item)
835 goto error;
836 for (i = 0; i < self->groups; i++) {
837 PyObject* o = state_getslice(&state, i+1, string, 1);
838 if (!o) {
839 Py_DECREF(item);
840 goto error;
841 }
842 PyTuple_SET_ITEM(item, i, o);
843 }
844 break;
845 }
846
847 status = PyList_Append(list, item);
848 Py_DECREF(item);
849 if (status < 0)
850 goto error;
851
852 if (state.ptr == state.start)
853 state.start = (void*) ((char*) state.ptr + state.charsize);
854 else
855 state.start = state.ptr;
856
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000857 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000858
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000859 state_fini(&state);
860 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000861
862error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000863 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000864 state_fini(&state);
865 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000866
Guido van Rossumb700df92000-03-31 14:59:30 +0000867}
868
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300869/*[clinic input]
870_sre.SRE_Pattern.finditer
871
872 string: object
873 pos: Py_ssize_t = 0
874 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
875
876Return an iterator over all non-overlapping matches for the RE pattern in string.
877
878For each match, the iterator returns a match object.
879[clinic start generated code]*/
880
881static PyObject *
882_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
883 Py_ssize_t pos, Py_ssize_t endpos)
884/*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
Fredrik Lundh703ce812001-10-24 22:16:30 +0000885{
886 PyObject* scanner;
887 PyObject* search;
888 PyObject* iterator;
889
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300890 scanner = pattern_scanner(self, string, pos, endpos);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000891 if (!scanner)
892 return NULL;
893
894 search = PyObject_GetAttrString(scanner, "search");
895 Py_DECREF(scanner);
896 if (!search)
897 return NULL;
898
899 iterator = PyCallIter_New(search, Py_None);
900 Py_DECREF(search);
901
902 return iterator;
903}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000904
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300905/*[clinic input]
906_sre.SRE_Pattern.scanner
907
908 string: object
909 pos: Py_ssize_t = 0
910 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
911
912[clinic start generated code]*/
913
914static PyObject *
915_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
916 Py_ssize_t pos, Py_ssize_t endpos)
917/*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
918{
919 return pattern_scanner(self, string, pos, endpos);
920}
921
922/*[clinic input]
923_sre.SRE_Pattern.split
924
925 string: object = NULL
926 maxsplit: Py_ssize_t = 0
927 *
928 source: object = NULL
929
930Split string by the occurrences of pattern.
931[clinic start generated code]*/
932
933static PyObject *
934_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
935 Py_ssize_t maxsplit, PyObject *source)
936/*[clinic end generated code: output=20bac2ff55b9f84c input=41e0b2e35e599d7b]*/
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000937{
938 SRE_STATE state;
939 PyObject* list;
940 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100941 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000942 Py_ssize_t n;
943 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000944 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000945
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300946 string = fix_string_param(string, source, "source");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200947 if (!string)
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000948 return NULL;
949
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200950 assert(self->codesize != 0);
951 if (self->code[0] != SRE_OP_INFO || self->code[3] == 0) {
952 if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) {
953 PyErr_SetString(PyExc_ValueError,
954 "split() requires a non-empty pattern match.");
955 return NULL;
956 }
957 if (PyErr_WarnEx(PyExc_FutureWarning,
958 "split() requires a non-empty pattern match.",
959 1) < 0)
960 return NULL;
961 }
962
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300963 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000964 return NULL;
965
966 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000967 if (!list) {
968 state_fini(&state);
969 return NULL;
970 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000971
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000972 n = 0;
973 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000974
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000975 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000976
977 state_reset(&state);
978
979 state.ptr = state.start;
980
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300981 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300982 if (PyErr_Occurred())
983 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000984
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000985 if (status <= 0) {
986 if (status == 0)
987 break;
988 pattern_error(status);
989 goto error;
990 }
Tim Peters3d563502006-01-21 02:47:53 +0000991
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000992 if (state.start == state.ptr) {
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +0300993 if (last == state.end || state.ptr == state.end)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000994 break;
995 /* skip one character */
996 state.start = (void*) ((char*) state.ptr + state.charsize);
997 continue;
998 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000999
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001000 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001001 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001002 string, STATE_OFFSET(&state, last),
1003 STATE_OFFSET(&state, state.start)
1004 );
1005 if (!item)
1006 goto error;
1007 status = PyList_Append(list, item);
1008 Py_DECREF(item);
1009 if (status < 0)
1010 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001011
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001012 /* add groups (if any) */
1013 for (i = 0; i < self->groups; i++) {
1014 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001015 if (!item)
1016 goto error;
1017 status = PyList_Append(list, item);
1018 Py_DECREF(item);
1019 if (status < 0)
1020 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001021 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001022
1023 n = n + 1;
1024
1025 last = state.start = state.ptr;
1026
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001027 }
1028
Fredrik Lundhf864aa82001-10-22 06:01:56 +00001029 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001030 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +00001031 string, STATE_OFFSET(&state, last), state.endpos
1032 );
1033 if (!item)
1034 goto error;
1035 status = PyList_Append(list, item);
1036 Py_DECREF(item);
1037 if (status < 0)
1038 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001039
1040 state_fini(&state);
1041 return list;
1042
1043error:
1044 Py_DECREF(list);
1045 state_fini(&state);
1046 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001047
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001048}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001049
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001050static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001051pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001052 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001053{
1054 SRE_STATE state;
1055 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001056 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001057 PyObject* item;
1058 PyObject* filter;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001059 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001060 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01001061 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001062 Py_ssize_t n;
1063 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001064 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001065 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001066 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001067
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001068 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001069 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001070 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001071 Py_INCREF(filter);
1072 filter_is_callable = 1;
1073 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001074 /* if not callable, check if it's a literal string */
1075 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001076 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001077 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001078 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001079 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001080 if (charsize == 1)
1081 literal = memchr(ptr, '\\', n) == NULL;
1082 else
1083 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001084 } else {
1085 PyErr_Clear();
1086 literal = 0;
1087 }
Benjamin Petersone48944b2012-03-07 14:50:25 -06001088 if (view.buf)
1089 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001090 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001091 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001092 Py_INCREF(filter);
1093 filter_is_callable = 0;
1094 } else {
1095 /* not a literal; hand it over to the template compiler */
1096 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001097 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001098 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001099 );
1100 if (!filter)
1101 return NULL;
1102 filter_is_callable = PyCallable_Check(filter);
1103 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00001104 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001105
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001106 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001107 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001108 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001109 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001110
1111 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001112 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001113 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001114 state_fini(&state);
1115 return NULL;
1116 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001117
1118 n = i = 0;
1119
1120 while (!count || n < count) {
1121
1122 state_reset(&state);
1123
1124 state.ptr = state.start;
1125
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001126 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001127 if (PyErr_Occurred())
1128 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001129
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001130 if (status <= 0) {
1131 if (status == 0)
1132 break;
1133 pattern_error(status);
1134 goto error;
1135 }
Tim Peters3d563502006-01-21 02:47:53 +00001136
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001137 b = STATE_OFFSET(&state, state.start);
1138 e = STATE_OFFSET(&state, state.ptr);
1139
1140 if (i < b) {
1141 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001142 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001143 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001144 if (!item)
1145 goto error;
1146 status = PyList_Append(list, item);
1147 Py_DECREF(item);
1148 if (status < 0)
1149 goto error;
1150
1151 } else if (i == b && i == e && n > 0)
1152 /* ignore empty match on latest position */
1153 goto next;
1154
1155 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001156 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001157 match = pattern_new_match(self, &state, 1);
1158 if (!match)
1159 goto error;
Victor Stinner559bb6a2016-08-22 22:48:54 +02001160 item = _PyObject_CallArg1(filter, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001161 Py_DECREF(match);
1162 if (!item)
1163 goto error;
1164 } else {
1165 /* filter is literal string */
1166 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001167 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001168 }
1169
1170 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001171 if (item != Py_None) {
1172 status = PyList_Append(list, item);
1173 Py_DECREF(item);
1174 if (status < 0)
1175 goto error;
1176 }
Tim Peters3d563502006-01-21 02:47:53 +00001177
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001178 i = e;
1179 n = n + 1;
1180
1181next:
1182 /* move on */
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001183 if (state.ptr == state.end)
1184 break;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001185 if (state.ptr == state.start)
1186 state.start = (void*) ((char*) state.ptr + state.charsize);
1187 else
1188 state.start = state.ptr;
1189
1190 }
1191
1192 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001193 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001194 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001195 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001196 if (!item)
1197 goto error;
1198 status = PyList_Append(list, item);
1199 Py_DECREF(item);
1200 if (status < 0)
1201 goto error;
1202 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001203
1204 state_fini(&state);
1205
Guido van Rossum4e173842001-12-07 04:25:10 +00001206 Py_DECREF(filter);
1207
Fredrik Lundhdac58492001-10-21 21:48:30 +00001208 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001209 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001210 if (!joiner) {
1211 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001212 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001213 }
1214 if (PyList_GET_SIZE(list) == 0) {
1215 Py_DECREF(list);
1216 item = joiner;
1217 }
1218 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001219 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001220 item = _PyBytes_Join(joiner, list);
1221 else
1222 item = PyUnicode_Join(joiner, list);
1223 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001224 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001225 if (!item)
1226 return NULL;
1227 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001228
1229 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001230 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001231
1232 return item;
1233
1234error:
1235 Py_DECREF(list);
1236 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001237 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001238 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001239
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001240}
1241
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001242/*[clinic input]
1243_sre.SRE_Pattern.sub
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001244
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001245 repl: object
1246 string: object
1247 count: Py_ssize_t = 0
1248
1249Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1250[clinic start generated code]*/
1251
1252static PyObject *
1253_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1254 PyObject *string, Py_ssize_t count)
1255/*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1256{
1257 return pattern_subx(self, repl, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001258}
1259
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001260/*[clinic input]
1261_sre.SRE_Pattern.subn
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001262
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001263 repl: object
1264 string: object
1265 count: Py_ssize_t = 0
1266
1267Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1268[clinic start generated code]*/
1269
1270static PyObject *
1271_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1272 PyObject *string, Py_ssize_t count)
1273/*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1274{
1275 return pattern_subx(self, repl, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001276}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001277
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001278/*[clinic input]
1279_sre.SRE_Pattern.__copy__
1280
1281[clinic start generated code]*/
1282
1283static PyObject *
1284_sre_SRE_Pattern___copy___impl(PatternObject *self)
1285/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001286{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001287#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001288 PatternObject* copy;
1289 int offset;
1290
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001291 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
1292 if (!copy)
1293 return NULL;
1294
1295 offset = offsetof(PatternObject, groups);
1296
1297 Py_XINCREF(self->groupindex);
1298 Py_XINCREF(self->indexgroup);
1299 Py_XINCREF(self->pattern);
1300
1301 memcpy((char*) copy + offset, (char*) self + offset,
1302 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00001303 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001304
1305 return (PyObject*) copy;
1306#else
1307 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
1308 return NULL;
1309#endif
1310}
1311
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001312/*[clinic input]
1313_sre.SRE_Pattern.__deepcopy__
1314
1315 memo: object
1316
1317[clinic start generated code]*/
1318
1319static PyObject *
1320_sre_SRE_Pattern___deepcopy___impl(PatternObject *self, PyObject *memo)
1321/*[clinic end generated code: output=75efe69bd12c5d7d input=3959719482c07f70]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001322{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001323#ifdef USE_BUILTIN_COPY
1324 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00001325
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001326 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001327 if (!copy)
1328 return NULL;
1329
1330 if (!deepcopy(&copy->groupindex, memo) ||
1331 !deepcopy(&copy->indexgroup, memo) ||
1332 !deepcopy(&copy->pattern, memo)) {
1333 Py_DECREF(copy);
1334 return NULL;
1335 }
1336
1337#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001338 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
1339 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001340#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001341}
1342
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001343static PyObject *
1344pattern_repr(PatternObject *obj)
1345{
1346 static const struct {
1347 const char *name;
1348 int value;
1349 } flag_names[] = {
1350 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1351 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1352 {"re.LOCALE", SRE_FLAG_LOCALE},
1353 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1354 {"re.DOTALL", SRE_FLAG_DOTALL},
1355 {"re.UNICODE", SRE_FLAG_UNICODE},
1356 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1357 {"re.DEBUG", SRE_FLAG_DEBUG},
1358 {"re.ASCII", SRE_FLAG_ASCII},
1359 };
1360 PyObject *result = NULL;
1361 PyObject *flag_items;
Victor Stinner706768c2014-08-16 01:03:39 +02001362 size_t i;
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001363 int flags = obj->flags;
1364
1365 /* Omit re.UNICODE for valid string patterns. */
1366 if (obj->isbytes == 0 &&
1367 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1368 SRE_FLAG_UNICODE)
1369 flags &= ~SRE_FLAG_UNICODE;
1370
1371 flag_items = PyList_New(0);
1372 if (!flag_items)
1373 return NULL;
1374
1375 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1376 if (flags & flag_names[i].value) {
1377 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1378 if (!item)
1379 goto done;
1380
1381 if (PyList_Append(flag_items, item) < 0) {
1382 Py_DECREF(item);
1383 goto done;
1384 }
1385 Py_DECREF(item);
1386 flags &= ~flag_names[i].value;
1387 }
1388 }
1389 if (flags) {
1390 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1391 if (!item)
1392 goto done;
1393
1394 if (PyList_Append(flag_items, item) < 0) {
1395 Py_DECREF(item);
1396 goto done;
1397 }
1398 Py_DECREF(item);
1399 }
1400
1401 if (PyList_Size(flag_items) > 0) {
1402 PyObject *flags_result;
1403 PyObject *sep = PyUnicode_FromString("|");
1404 if (!sep)
1405 goto done;
1406 flags_result = PyUnicode_Join(sep, flag_items);
1407 Py_DECREF(sep);
1408 if (!flags_result)
1409 goto done;
1410 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1411 obj->pattern, flags_result);
1412 Py_DECREF(flags_result);
1413 }
1414 else {
1415 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1416 }
1417
1418done:
1419 Py_DECREF(flag_items);
1420 return result;
1421}
1422
Raymond Hettinger94478742004-09-24 04:31:19 +00001423PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
1424
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001425/* PatternObject's 'groupindex' method. */
1426static PyObject *
1427pattern_groupindex(PatternObject *self)
1428{
1429 return PyDictProxy_New(self->groupindex);
1430}
1431
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001432static int _validate(PatternObject *self); /* Forward */
1433
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001434/*[clinic input]
1435_sre.compile
1436
1437 pattern: object
1438 flags: int
1439 code: object(subclass_of='&PyList_Type')
1440 groups: Py_ssize_t
Victor Stinner726a57d2016-11-22 23:04:39 +01001441 groupindex: object(subclass_of='&PyDict_Type')
1442 indexgroup: object(subclass_of='&PyTuple_Type')
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001443
1444[clinic start generated code]*/
1445
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001446static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001447_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001448 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1449 PyObject *indexgroup)
Victor Stinner726a57d2016-11-22 23:04:39 +01001450/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001451{
1452 /* "compile" pattern descriptor to pattern object */
1453
1454 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001455 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001456
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001457 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001458 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001459 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1460 if (!self)
1461 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001462 self->weakreflist = NULL;
1463 self->pattern = NULL;
1464 self->groupindex = NULL;
1465 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001466
1467 self->codesize = n;
1468
1469 for (i = 0; i < n; i++) {
1470 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001471 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001472 self->code[i] = (SRE_CODE) value;
1473 if ((unsigned long) self->code[i] != value) {
1474 PyErr_SetString(PyExc_OverflowError,
1475 "regular expression code size limit exceeded");
1476 break;
1477 }
1478 }
1479
1480 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001481 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001482 return NULL;
1483 }
1484
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001485 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001486 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001487 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001488 else {
1489 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001490 int charsize;
1491 Py_buffer view;
1492 view.buf = NULL;
1493 if (!getstring(pattern, &p_length, &self->isbytes,
1494 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001495 Py_DECREF(self);
1496 return NULL;
1497 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001498 if (view.buf)
1499 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001500 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001501
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001502 Py_INCREF(pattern);
1503 self->pattern = pattern;
1504
1505 self->flags = flags;
1506
1507 self->groups = groups;
1508
Victor Stinnerb44fb122016-11-21 16:35:08 +01001509 Py_INCREF(groupindex);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001510 self->groupindex = groupindex;
1511
Victor Stinnerb44fb122016-11-21 16:35:08 +01001512 Py_INCREF(indexgroup);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001513 self->indexgroup = indexgroup;
1514
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001515 if (!_validate(self)) {
1516 Py_DECREF(self);
1517 return NULL;
1518 }
1519
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001520 return (PyObject*) self;
1521}
1522
Guido van Rossumb700df92000-03-31 14:59:30 +00001523/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001524/* Code validation */
1525
1526/* To learn more about this code, have a look at the _compile() function in
1527 Lib/sre_compile.py. The validation functions below checks the code array
1528 for conformance with the code patterns generated there.
1529
1530 The nice thing about the generated code is that it is position-independent:
1531 all jumps are relative jumps forward. Also, jumps don't cross each other:
1532 the target of a later jump is always earlier than the target of an earlier
1533 jump. IOW, this is okay:
1534
1535 J---------J-------T--------T
1536 \ \_____/ /
1537 \______________________/
1538
1539 but this is not:
1540
1541 J---------J-------T--------T
1542 \_________\_____/ /
1543 \____________/
1544
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001545 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001546*/
1547
1548/* Defining this one enables tracing of the validator */
1549#undef VVERBOSE
1550
1551/* Trace macro for the validator */
1552#if defined(VVERBOSE)
1553#define VTRACE(v) printf v
1554#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001555#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001556#endif
1557
1558/* Report failure */
1559#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1560
1561/* Extract opcode, argument, or skip count from code array */
1562#define GET_OP \
1563 do { \
1564 VTRACE(("%p: ", code)); \
1565 if (code >= end) FAIL; \
1566 op = *code++; \
1567 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1568 } while (0)
1569#define GET_ARG \
1570 do { \
1571 VTRACE(("%p= ", code)); \
1572 if (code >= end) FAIL; \
1573 arg = *code++; \
1574 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1575 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001576#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001577 do { \
1578 VTRACE(("%p= ", code)); \
1579 if (code >= end) FAIL; \
1580 skip = *code; \
1581 VTRACE(("%lu (skip to %p)\n", \
1582 (unsigned long)skip, code+skip)); \
Benjamin Petersonca470632016-09-06 13:47:26 -07001583 if (skip-adj > (uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001584 FAIL; \
1585 code++; \
1586 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001587#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001588
1589static int
1590_validate_charset(SRE_CODE *code, SRE_CODE *end)
1591{
1592 /* Some variables are manipulated by the macros above */
1593 SRE_CODE op;
1594 SRE_CODE arg;
1595 SRE_CODE offset;
1596 int i;
1597
1598 while (code < end) {
1599 GET_OP;
1600 switch (op) {
1601
1602 case SRE_OP_NEGATE:
1603 break;
1604
1605 case SRE_OP_LITERAL:
1606 GET_ARG;
1607 break;
1608
1609 case SRE_OP_RANGE:
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +02001610 case SRE_OP_RANGE_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001611 GET_ARG;
1612 GET_ARG;
1613 break;
1614
1615 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001616 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Benjamin Petersonca470632016-09-06 13:47:26 -07001617 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001618 FAIL;
1619 code += offset;
1620 break;
1621
1622 case SRE_OP_BIGCHARSET:
1623 GET_ARG; /* Number of blocks */
1624 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001625 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001626 FAIL;
1627 /* Make sure that each byte points to a valid block */
1628 for (i = 0; i < 256; i++) {
1629 if (((unsigned char *)code)[i] >= arg)
1630 FAIL;
1631 }
1632 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001633 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Benjamin Petersonca470632016-09-06 13:47:26 -07001634 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001635 FAIL;
1636 code += offset;
1637 break;
1638
1639 case SRE_OP_CATEGORY:
1640 GET_ARG;
1641 switch (arg) {
1642 case SRE_CATEGORY_DIGIT:
1643 case SRE_CATEGORY_NOT_DIGIT:
1644 case SRE_CATEGORY_SPACE:
1645 case SRE_CATEGORY_NOT_SPACE:
1646 case SRE_CATEGORY_WORD:
1647 case SRE_CATEGORY_NOT_WORD:
1648 case SRE_CATEGORY_LINEBREAK:
1649 case SRE_CATEGORY_NOT_LINEBREAK:
1650 case SRE_CATEGORY_LOC_WORD:
1651 case SRE_CATEGORY_LOC_NOT_WORD:
1652 case SRE_CATEGORY_UNI_DIGIT:
1653 case SRE_CATEGORY_UNI_NOT_DIGIT:
1654 case SRE_CATEGORY_UNI_SPACE:
1655 case SRE_CATEGORY_UNI_NOT_SPACE:
1656 case SRE_CATEGORY_UNI_WORD:
1657 case SRE_CATEGORY_UNI_NOT_WORD:
1658 case SRE_CATEGORY_UNI_LINEBREAK:
1659 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1660 break;
1661 default:
1662 FAIL;
1663 }
1664 break;
1665
1666 default:
1667 FAIL;
1668
1669 }
1670 }
1671
1672 return 1;
1673}
1674
1675static int
1676_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1677{
1678 /* Some variables are manipulated by the macros above */
1679 SRE_CODE op;
1680 SRE_CODE arg;
1681 SRE_CODE skip;
1682
1683 VTRACE(("code=%p, end=%p\n", code, end));
1684
1685 if (code > end)
1686 FAIL;
1687
1688 while (code < end) {
1689 GET_OP;
1690 switch (op) {
1691
1692 case SRE_OP_MARK:
1693 /* We don't check whether marks are properly nested; the
1694 sre_match() code is robust even if they don't, and the worst
1695 you can get is nonsensical match results. */
1696 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001697 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001698 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1699 FAIL;
1700 }
1701 break;
1702
1703 case SRE_OP_LITERAL:
1704 case SRE_OP_NOT_LITERAL:
1705 case SRE_OP_LITERAL_IGNORE:
1706 case SRE_OP_NOT_LITERAL_IGNORE:
1707 GET_ARG;
1708 /* The arg is just a character, nothing to check */
1709 break;
1710
1711 case SRE_OP_SUCCESS:
1712 case SRE_OP_FAILURE:
1713 /* Nothing to check; these normally end the matching process */
1714 break;
1715
1716 case SRE_OP_AT:
1717 GET_ARG;
1718 switch (arg) {
1719 case SRE_AT_BEGINNING:
1720 case SRE_AT_BEGINNING_STRING:
1721 case SRE_AT_BEGINNING_LINE:
1722 case SRE_AT_END:
1723 case SRE_AT_END_LINE:
1724 case SRE_AT_END_STRING:
1725 case SRE_AT_BOUNDARY:
1726 case SRE_AT_NON_BOUNDARY:
1727 case SRE_AT_LOC_BOUNDARY:
1728 case SRE_AT_LOC_NON_BOUNDARY:
1729 case SRE_AT_UNI_BOUNDARY:
1730 case SRE_AT_UNI_NON_BOUNDARY:
1731 break;
1732 default:
1733 FAIL;
1734 }
1735 break;
1736
1737 case SRE_OP_ANY:
1738 case SRE_OP_ANY_ALL:
1739 /* These have no operands */
1740 break;
1741
1742 case SRE_OP_IN:
1743 case SRE_OP_IN_IGNORE:
1744 GET_SKIP;
1745 /* Stop 1 before the end; we check the FAILURE below */
1746 if (!_validate_charset(code, code+skip-2))
1747 FAIL;
1748 if (code[skip-2] != SRE_OP_FAILURE)
1749 FAIL;
1750 code += skip-1;
1751 break;
1752
1753 case SRE_OP_INFO:
1754 {
1755 /* A minimal info field is
1756 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1757 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1758 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001759 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001760 SRE_CODE *newcode;
1761 GET_SKIP;
1762 newcode = code+skip-1;
1763 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001764 GET_ARG;
1765 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001766 /* Check that only valid flags are present */
1767 if ((flags & ~(SRE_INFO_PREFIX |
1768 SRE_INFO_LITERAL |
1769 SRE_INFO_CHARSET)) != 0)
1770 FAIL;
1771 /* PREFIX and CHARSET are mutually exclusive */
1772 if ((flags & SRE_INFO_PREFIX) &&
1773 (flags & SRE_INFO_CHARSET))
1774 FAIL;
1775 /* LITERAL implies PREFIX */
1776 if ((flags & SRE_INFO_LITERAL) &&
1777 !(flags & SRE_INFO_PREFIX))
1778 FAIL;
1779 /* Validate the prefix */
1780 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001781 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001782 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001783 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001784 /* Here comes the prefix string */
Benjamin Petersonca470632016-09-06 13:47:26 -07001785 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001786 FAIL;
1787 code += prefix_len;
1788 /* And here comes the overlap table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001789 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001790 FAIL;
1791 /* Each overlap value should be < prefix_len */
1792 for (i = 0; i < prefix_len; i++) {
1793 if (code[i] >= prefix_len)
1794 FAIL;
1795 }
1796 code += prefix_len;
1797 }
1798 /* Validate the charset */
1799 if (flags & SRE_INFO_CHARSET) {
1800 if (!_validate_charset(code, newcode-1))
1801 FAIL;
1802 if (newcode[-1] != SRE_OP_FAILURE)
1803 FAIL;
1804 code = newcode;
1805 }
1806 else if (code != newcode) {
1807 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1808 FAIL;
1809 }
1810 }
1811 break;
1812
1813 case SRE_OP_BRANCH:
1814 {
1815 SRE_CODE *target = NULL;
1816 for (;;) {
1817 GET_SKIP;
1818 if (skip == 0)
1819 break;
1820 /* Stop 2 before the end; we check the JUMP below */
1821 if (!_validate_inner(code, code+skip-3, groups))
1822 FAIL;
1823 code += skip-3;
1824 /* Check that it ends with a JUMP, and that each JUMP
1825 has the same target */
1826 GET_OP;
1827 if (op != SRE_OP_JUMP)
1828 FAIL;
1829 GET_SKIP;
1830 if (target == NULL)
1831 target = code+skip-1;
1832 else if (code+skip-1 != target)
1833 FAIL;
1834 }
1835 }
1836 break;
1837
1838 case SRE_OP_REPEAT_ONE:
1839 case SRE_OP_MIN_REPEAT_ONE:
1840 {
1841 SRE_CODE min, max;
1842 GET_SKIP;
1843 GET_ARG; min = arg;
1844 GET_ARG; max = arg;
1845 if (min > max)
1846 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001847 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001848 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001849 if (!_validate_inner(code, code+skip-4, groups))
1850 FAIL;
1851 code += skip-4;
1852 GET_OP;
1853 if (op != SRE_OP_SUCCESS)
1854 FAIL;
1855 }
1856 break;
1857
1858 case SRE_OP_REPEAT:
1859 {
1860 SRE_CODE min, max;
1861 GET_SKIP;
1862 GET_ARG; min = arg;
1863 GET_ARG; max = arg;
1864 if (min > max)
1865 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001866 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001867 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001868 if (!_validate_inner(code, code+skip-3, groups))
1869 FAIL;
1870 code += skip-3;
1871 GET_OP;
1872 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1873 FAIL;
1874 }
1875 break;
1876
1877 case SRE_OP_GROUPREF:
1878 case SRE_OP_GROUPREF_IGNORE:
1879 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001880 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001881 FAIL;
1882 break;
1883
1884 case SRE_OP_GROUPREF_EXISTS:
1885 /* The regex syntax for this is: '(?(group)then|else)', where
1886 'group' is either an integer group number or a group name,
1887 'then' and 'else' are sub-regexes, and 'else' is optional. */
1888 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001889 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001890 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001891 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001892 code--; /* The skip is relative to the first arg! */
1893 /* There are two possibilities here: if there is both a 'then'
1894 part and an 'else' part, the generated code looks like:
1895
1896 GROUPREF_EXISTS
1897 <group>
1898 <skipyes>
1899 ...then part...
1900 JUMP
1901 <skipno>
1902 (<skipyes> jumps here)
1903 ...else part...
1904 (<skipno> jumps here)
1905
1906 If there is only a 'then' part, it looks like:
1907
1908 GROUPREF_EXISTS
1909 <group>
1910 <skip>
1911 ...then part...
1912 (<skip> jumps here)
1913
1914 There is no direct way to decide which it is, and we don't want
1915 to allow arbitrary jumps anywhere in the code; so we just look
1916 for a JUMP opcode preceding our skip target.
1917 */
Benjamin Petersonca470632016-09-06 13:47:26 -07001918 if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001919 code[skip-3] == SRE_OP_JUMP)
1920 {
1921 VTRACE(("both then and else parts present\n"));
1922 if (!_validate_inner(code+1, code+skip-3, groups))
1923 FAIL;
1924 code += skip-2; /* Position after JUMP, at <skipno> */
1925 GET_SKIP;
1926 if (!_validate_inner(code, code+skip-1, groups))
1927 FAIL;
1928 code += skip-1;
1929 }
1930 else {
1931 VTRACE(("only a then part present\n"));
1932 if (!_validate_inner(code+1, code+skip-1, groups))
1933 FAIL;
1934 code += skip-1;
1935 }
1936 break;
1937
1938 case SRE_OP_ASSERT:
1939 case SRE_OP_ASSERT_NOT:
1940 GET_SKIP;
1941 GET_ARG; /* 0 for lookahead, width for lookbehind */
1942 code--; /* Back up over arg to simplify math below */
1943 if (arg & 0x80000000)
1944 FAIL; /* Width too large */
1945 /* Stop 1 before the end; we check the SUCCESS below */
1946 if (!_validate_inner(code+1, code+skip-2, groups))
1947 FAIL;
1948 code += skip-2;
1949 GET_OP;
1950 if (op != SRE_OP_SUCCESS)
1951 FAIL;
1952 break;
1953
1954 default:
1955 FAIL;
1956
1957 }
1958 }
1959
1960 VTRACE(("okay\n"));
1961 return 1;
1962}
1963
1964static int
1965_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1966{
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001967 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1968 code >= end || end[-1] != SRE_OP_SUCCESS)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001969 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001970 return _validate_inner(code, end-1, groups);
1971}
1972
1973static int
1974_validate(PatternObject *self)
1975{
1976 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1977 {
1978 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1979 return 0;
1980 }
1981 else
1982 VTRACE(("Success!\n"));
1983 return 1;
1984}
1985
1986/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001987/* match methods */
1988
1989static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001990match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001991{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001992 Py_XDECREF(self->regs);
1993 Py_XDECREF(self->string);
1994 Py_DECREF(self->pattern);
1995 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001996}
1997
1998static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001999match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00002000{
Serhiy Storchaka25324972013-10-16 12:46:28 +03002001 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002002 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03002003 Py_buffer view;
2004 PyObject *result;
2005 void* ptr;
2006
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002007 if (index < 0 || index >= self->groups) {
2008 /* raise IndexError if we were given a bad group number */
2009 PyErr_SetString(
2010 PyExc_IndexError,
2011 "no such group"
2012 );
2013 return NULL;
2014 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002015
Fredrik Lundh6f013982000-07-03 18:44:21 +00002016 index *= 2;
2017
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002018 if (self->string == Py_None || self->mark[index] < 0) {
2019 /* return default value if the string or group is undefined */
2020 Py_INCREF(def);
2021 return def;
2022 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002023
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002024 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03002025 if (ptr == NULL)
2026 return NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002027 result = getslice(isbytes, ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +03002028 self->string, self->mark[index], self->mark[index+1]);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002029 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03002030 PyBuffer_Release(&view);
2031 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002032}
2033
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002034static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002035match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002036{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002037 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002038
Guido van Rossumddefaf32007-01-14 03:31:43 +00002039 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002040 /* Default value */
2041 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00002042
Serhiy Storchaka977b3ac2016-06-18 16:48:07 +03002043 if (PyIndex_Check(index)) {
2044 return PyNumber_AsSsize_t(index, NULL);
2045 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002046
Fredrik Lundh6f013982000-07-03 18:44:21 +00002047 i = -1;
2048
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002049 if (self->pattern->groupindex) {
2050 index = PyObject_GetItem(self->pattern->groupindex, index);
2051 if (index) {
Neal Norwitz1fe5f382007-08-31 04:32:55 +00002052 if (PyLong_Check(index))
Christian Heimes217cfd12007-12-02 14:31:20 +00002053 i = PyLong_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00002054 Py_DECREF(index);
2055 } else
2056 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002057 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002058
2059 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002060}
2061
2062static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002063match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002064{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002065 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002066}
2067
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002068/*[clinic input]
2069_sre.SRE_Match.expand
2070
2071 template: object
2072
2073Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
2074[clinic start generated code]*/
2075
2076static PyObject *
2077_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
2078/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002079{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002080 /* delegate to Python code */
2081 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00002082 SRE_PY_MODULE, "_expand",
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002083 PyTuple_Pack(3, self->pattern, self, template)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002084 );
2085}
2086
2087static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002088match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002089{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002090 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002091 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002092
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002093 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002094
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002095 switch (size) {
2096 case 0:
2097 result = match_getslice(self, Py_False, Py_None);
2098 break;
2099 case 1:
2100 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2101 break;
2102 default:
2103 /* fetch multiple items */
2104 result = PyTuple_New(size);
2105 if (!result)
2106 return NULL;
2107 for (i = 0; i < size; i++) {
2108 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002109 self, PyTuple_GET_ITEM(args, i), Py_None
2110 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002111 if (!item) {
2112 Py_DECREF(result);
2113 return NULL;
2114 }
2115 PyTuple_SET_ITEM(result, i, item);
2116 }
2117 break;
2118 }
2119 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002120}
2121
Eric V. Smith605bdae2016-09-11 08:55:43 -04002122static PyObject*
2123match_getitem(MatchObject* self, PyObject* name)
2124{
2125 return match_getslice(self, name, Py_None);
2126}
2127
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002128/*[clinic input]
2129_sre.SRE_Match.groups
2130
2131 default: object = None
2132 Is used for groups that did not participate in the match.
2133
2134Return a tuple containing all the subgroups of the match, from 1.
2135[clinic start generated code]*/
2136
2137static PyObject *
2138_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2139/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002140{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002141 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002142 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002143
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002144 result = PyTuple_New(self->groups-1);
2145 if (!result)
2146 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002147
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002148 for (index = 1; index < self->groups; index++) {
2149 PyObject* item;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002150 item = match_getslice_by_index(self, index, default_value);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002151 if (!item) {
2152 Py_DECREF(result);
2153 return NULL;
2154 }
2155 PyTuple_SET_ITEM(result, index-1, item);
2156 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002157
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002158 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002159}
2160
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002161/*[clinic input]
2162_sre.SRE_Match.groupdict
2163
2164 default: object = None
2165 Is used for groups that did not participate in the match.
2166
2167Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2168[clinic start generated code]*/
2169
2170static PyObject *
2171_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2172/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002173{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002174 PyObject* result;
2175 PyObject* keys;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002176 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002177
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002178 result = PyDict_New();
2179 if (!result || !self->pattern->groupindex)
2180 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002181
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002182 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002183 if (!keys)
2184 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002185
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002186 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002187 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002188 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002189 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002190 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002191 if (!key)
2192 goto failed;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002193 value = match_getslice(self, key, default_value);
Benjamin Peterson3a27b082016-08-15 22:01:41 -07002194 if (!value)
Fredrik Lundh770617b2001-01-14 15:06:11 +00002195 goto failed;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002196 status = PyDict_SetItem(result, key, value);
2197 Py_DECREF(value);
2198 if (status < 0)
2199 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002200 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002201
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002202 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002203
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002204 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002205
2206failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00002207 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002208 Py_DECREF(result);
2209 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002210}
2211
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002212/*[clinic input]
2213_sre.SRE_Match.start -> Py_ssize_t
2214
2215 group: object(c_default="NULL") = 0
2216 /
2217
2218Return index of the start of the substring matched by group.
2219[clinic start generated code]*/
2220
2221static Py_ssize_t
2222_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2223/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002224{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002225 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002226
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002227 if (index < 0 || index >= self->groups) {
2228 PyErr_SetString(
2229 PyExc_IndexError,
2230 "no such group"
2231 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002232 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002233 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002234
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002235 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002236 return self->mark[index*2];
Guido van Rossumb700df92000-03-31 14:59:30 +00002237}
2238
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002239/*[clinic input]
2240_sre.SRE_Match.end -> Py_ssize_t
2241
2242 group: object(c_default="NULL") = 0
2243 /
2244
2245Return index of the end of the substring matched by group.
2246[clinic start generated code]*/
2247
2248static Py_ssize_t
2249_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2250/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002251{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002252 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002253
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002254 if (index < 0 || index >= self->groups) {
2255 PyErr_SetString(
2256 PyExc_IndexError,
2257 "no such group"
2258 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002259 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002260 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002261
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002262 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002263 return self->mark[index*2+1];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002264}
2265
2266LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002267_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002268{
2269 PyObject* pair;
2270 PyObject* item;
2271
2272 pair = PyTuple_New(2);
2273 if (!pair)
2274 return NULL;
2275
Christian Heimes217cfd12007-12-02 14:31:20 +00002276 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002277 if (!item)
2278 goto error;
2279 PyTuple_SET_ITEM(pair, 0, item);
2280
Christian Heimes217cfd12007-12-02 14:31:20 +00002281 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002282 if (!item)
2283 goto error;
2284 PyTuple_SET_ITEM(pair, 1, item);
2285
2286 return pair;
2287
2288 error:
2289 Py_DECREF(pair);
2290 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002291}
2292
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002293/*[clinic input]
2294_sre.SRE_Match.span
2295
2296 group: object(c_default="NULL") = 0
2297 /
2298
2299For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).
2300[clinic start generated code]*/
2301
2302static PyObject *
2303_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2304/*[clinic end generated code: output=f02ae40594d14fe6 input=49092b6008d176d3]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002305{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002306 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002307
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002308 if (index < 0 || index >= self->groups) {
2309 PyErr_SetString(
2310 PyExc_IndexError,
2311 "no such group"
2312 );
2313 return NULL;
2314 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002315
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002316 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002317 return _pair(self->mark[index*2], self->mark[index*2+1]);
2318}
2319
2320static PyObject*
2321match_regs(MatchObject* self)
2322{
2323 PyObject* regs;
2324 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002325 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002326
2327 regs = PyTuple_New(self->groups);
2328 if (!regs)
2329 return NULL;
2330
2331 for (index = 0; index < self->groups; index++) {
2332 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2333 if (!item) {
2334 Py_DECREF(regs);
2335 return NULL;
2336 }
2337 PyTuple_SET_ITEM(regs, index, item);
2338 }
2339
2340 Py_INCREF(regs);
2341 self->regs = regs;
2342
2343 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002344}
2345
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002346/*[clinic input]
2347_sre.SRE_Match.__copy__
2348
2349[clinic start generated code]*/
2350
2351static PyObject *
2352_sre_SRE_Match___copy___impl(MatchObject *self)
2353/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002354{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002355#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002356 MatchObject* copy;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002357 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00002358
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002359 slots = 2 * (self->pattern->groups+1);
2360
2361 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2362 if (!copy)
2363 return NULL;
2364
2365 /* this value a constant, but any compiler should be able to
2366 figure that out all by itself */
2367 offset = offsetof(MatchObject, string);
2368
2369 Py_XINCREF(self->pattern);
2370 Py_XINCREF(self->string);
2371 Py_XINCREF(self->regs);
2372
2373 memcpy((char*) copy + offset, (char*) self + offset,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002374 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002375
2376 return (PyObject*) copy;
2377#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002378 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002379 return NULL;
2380#endif
2381}
2382
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002383/*[clinic input]
2384_sre.SRE_Match.__deepcopy__
2385
2386 memo: object
2387
2388[clinic start generated code]*/
2389
2390static PyObject *
2391_sre_SRE_Match___deepcopy___impl(MatchObject *self, PyObject *memo)
2392/*[clinic end generated code: output=2b657578eb03f4a3 input=b65b72489eac64cc]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002393{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002394#ifdef USE_BUILTIN_COPY
2395 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002396
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002397 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002398 if (!copy)
2399 return NULL;
2400
2401 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2402 !deepcopy(&copy->string, memo) ||
2403 !deepcopy(&copy->regs, memo)) {
2404 Py_DECREF(copy);
2405 return NULL;
2406 }
2407
2408#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002409 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2410 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002411#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002412}
2413
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002414PyDoc_STRVAR(match_doc,
2415"The result of re.match() and re.search().\n\
2416Match objects always have a boolean value of True.");
2417
2418PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002419"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002420 Return subgroup(s) of the match by indices or names.\n\
2421 For 0 returns the entire match.");
2422
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002423static PyObject *
2424match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002425{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002426 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002427 return PyLong_FromSsize_t(self->lastindex);
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002428 Py_INCREF(Py_None);
2429 return Py_None;
Guido van Rossumb700df92000-03-31 14:59:30 +00002430}
2431
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002432static PyObject *
2433match_lastgroup_get(MatchObject *self)
2434{
2435 if (self->pattern->indexgroup && self->lastindex >= 0) {
2436 PyObject* result = PySequence_GetItem(
2437 self->pattern->indexgroup, self->lastindex
2438 );
2439 if (result)
2440 return result;
2441 PyErr_Clear();
2442 }
2443 Py_INCREF(Py_None);
2444 return Py_None;
2445}
2446
2447static PyObject *
2448match_regs_get(MatchObject *self)
2449{
2450 if (self->regs) {
2451 Py_INCREF(self->regs);
2452 return self->regs;
2453 } else
2454 return match_regs(self);
2455}
2456
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002457static PyObject *
2458match_repr(MatchObject *self)
2459{
2460 PyObject *result;
2461 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2462 if (group0 == NULL)
2463 return NULL;
2464 result = PyUnicode_FromFormat(
2465 "<%s object; span=(%d, %d), match=%.50R>",
2466 Py_TYPE(self)->tp_name,
2467 self->mark[0], self->mark[1], group0);
2468 Py_DECREF(group0);
2469 return result;
2470}
2471
2472
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002473static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002474pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002475{
2476 /* create match object (from state object) */
2477
2478 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002479 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002480 char* base;
2481 int n;
2482
2483 if (status > 0) {
2484
2485 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002486 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002487 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2488 2*(pattern->groups+1));
2489 if (!match)
2490 return NULL;
2491
2492 Py_INCREF(pattern);
2493 match->pattern = pattern;
2494
2495 Py_INCREF(state->string);
2496 match->string = state->string;
2497
2498 match->regs = NULL;
2499 match->groups = pattern->groups+1;
2500
2501 /* fill in group slices */
2502
2503 base = (char*) state->beginning;
2504 n = state->charsize;
2505
2506 match->mark[0] = ((char*) state->start - base) / n;
2507 match->mark[1] = ((char*) state->ptr - base) / n;
2508
2509 for (i = j = 0; i < pattern->groups; i++, j+=2)
2510 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2511 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2512 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2513 } else
2514 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2515
2516 match->pos = state->pos;
2517 match->endpos = state->endpos;
2518
2519 match->lastindex = state->lastindex;
2520
2521 return (PyObject*) match;
2522
2523 } else if (status == 0) {
2524
2525 /* no match */
2526 Py_INCREF(Py_None);
2527 return Py_None;
2528
2529 }
2530
2531 /* internal error */
2532 pattern_error(status);
2533 return NULL;
2534}
2535
2536
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002537/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002538/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002539
2540static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002541scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002542{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002543 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002544 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002545 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002546}
2547
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002548/*[clinic input]
2549_sre.SRE_Scanner.match
2550
2551[clinic start generated code]*/
2552
2553static PyObject *
2554_sre_SRE_Scanner_match_impl(ScannerObject *self)
2555/*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002556{
2557 SRE_STATE* state = &self->state;
2558 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002559 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002560
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002561 if (state->start == NULL)
2562 Py_RETURN_NONE;
2563
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002564 state_reset(state);
2565
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002566 state->ptr = state->start;
2567
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03002568 status = sre_match(state, PatternObject_GetCode(self->pattern), 0);
Thomas Wouters89f507f2006-12-13 04:49:30 +00002569 if (PyErr_Occurred())
2570 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002571
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002572 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002573 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002574
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002575 if (status == 0)
2576 state->start = NULL;
2577 else if (state->ptr != state->start)
2578 state->start = state->ptr;
2579 else if (state->ptr != state->end)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002580 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002581 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002582 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002583
2584 return match;
2585}
2586
2587
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002588/*[clinic input]
2589_sre.SRE_Scanner.search
2590
2591[clinic start generated code]*/
2592
2593static PyObject *
2594_sre_SRE_Scanner_search_impl(ScannerObject *self)
2595/*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002596{
2597 SRE_STATE* state = &self->state;
2598 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002599 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002600
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002601 if (state->start == NULL)
2602 Py_RETURN_NONE;
2603
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002604 state_reset(state);
2605
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002606 state->ptr = state->start;
2607
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002608 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002609 if (PyErr_Occurred())
2610 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002611
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002612 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002613 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002614
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002615 if (status == 0)
2616 state->start = NULL;
2617 else if (state->ptr != state->start)
2618 state->start = state->ptr;
2619 else if (state->ptr != state->end)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002620 state->start = (void*) ((char*) state->ptr + state->charsize);
2621 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002622 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002623
2624 return match;
2625}
2626
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002627static PyObject *
2628pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002629{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002630 ScannerObject* scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002631
2632 /* create scanner object */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002633 scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2634 if (!scanner)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002635 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002636 scanner->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002637
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002638 /* create search state object */
2639 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2640 Py_DECREF(scanner);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002641 return NULL;
2642 }
2643
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002644 Py_INCREF(self);
2645 scanner->pattern = (PyObject*) self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002646
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002647 return (PyObject*) scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002648}
2649
Victor Stinnerb44fb122016-11-21 16:35:08 +01002650static Py_hash_t
2651pattern_hash(PatternObject *self)
2652{
2653 Py_hash_t hash, hash2;
2654
2655 hash = PyObject_Hash(self->pattern);
2656 if (hash == -1) {
2657 return -1;
2658 }
2659
2660 hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2661 hash ^= hash2;
2662
2663 hash ^= self->flags;
2664 hash ^= self->isbytes;
2665 hash ^= self->codesize;
2666
2667 if (hash == -1) {
2668 hash = -2;
2669 }
2670 return hash;
2671}
2672
2673static PyObject*
2674pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2675{
2676 PatternObject *left, *right;
2677 int cmp;
2678
2679 if (op != Py_EQ && op != Py_NE) {
2680 Py_RETURN_NOTIMPLEMENTED;
2681 }
2682
2683 if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
2684 Py_RETURN_NOTIMPLEMENTED;
2685 }
Victor Stinnerbcf4dcc2016-11-22 15:30:38 +01002686
2687 if (lefto == righto) {
2688 /* a pattern is equal to itself */
2689 return PyBool_FromLong(op == Py_EQ);
2690 }
2691
Victor Stinnerb44fb122016-11-21 16:35:08 +01002692 left = (PatternObject *)lefto;
2693 right = (PatternObject *)righto;
2694
2695 cmp = (left->flags == right->flags
2696 && left->isbytes == right->isbytes
Victor Stinnere670b2d2016-11-22 15:23:00 +01002697 && left->codesize == right->codesize);
Victor Stinnerb44fb122016-11-21 16:35:08 +01002698 if (cmp) {
2699 /* Compare the code and the pattern because the same pattern can
2700 produce different codes depending on the locale used to compile the
2701 pattern when the re.LOCALE flag is used. Don't compare groups,
2702 indexgroup nor groupindex: they are derivated from the pattern. */
2703 cmp = (memcmp(left->code, right->code,
2704 sizeof(left->code[0]) * left->codesize) == 0);
2705 }
2706 if (cmp) {
2707 cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2708 Py_EQ);
2709 if (cmp < 0) {
2710 return NULL;
2711 }
2712 }
2713 if (op == Py_NE) {
2714 cmp = !cmp;
2715 }
2716 return PyBool_FromLong(cmp);
2717}
2718
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002719#include "clinic/_sre.c.h"
2720
2721static PyMethodDef pattern_methods[] = {
2722 _SRE_SRE_PATTERN_MATCH_METHODDEF
2723 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2724 _SRE_SRE_PATTERN_SEARCH_METHODDEF
2725 _SRE_SRE_PATTERN_SUB_METHODDEF
2726 _SRE_SRE_PATTERN_SUBN_METHODDEF
2727 _SRE_SRE_PATTERN_FINDALL_METHODDEF
2728 _SRE_SRE_PATTERN_SPLIT_METHODDEF
2729 _SRE_SRE_PATTERN_FINDITER_METHODDEF
2730 _SRE_SRE_PATTERN_SCANNER_METHODDEF
2731 _SRE_SRE_PATTERN___COPY___METHODDEF
2732 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2733 {NULL, NULL}
2734};
2735
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002736static PyGetSetDef pattern_getset[] = {
2737 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2738 "A dictionary mapping group names to group numbers."},
2739 {NULL} /* Sentinel */
2740};
2741
2742#define PAT_OFF(x) offsetof(PatternObject, x)
2743static PyMemberDef pattern_members[] = {
2744 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
2745 {"flags", T_INT, PAT_OFF(flags), READONLY},
2746 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
2747 {NULL} /* Sentinel */
2748};
2749
2750static PyTypeObject Pattern_Type = {
2751 PyVarObject_HEAD_INIT(NULL, 0)
2752 "_" SRE_MODULE ".SRE_Pattern",
2753 sizeof(PatternObject), sizeof(SRE_CODE),
2754 (destructor)pattern_dealloc, /* tp_dealloc */
2755 0, /* tp_print */
2756 0, /* tp_getattr */
2757 0, /* tp_setattr */
2758 0, /* tp_reserved */
2759 (reprfunc)pattern_repr, /* tp_repr */
2760 0, /* tp_as_number */
2761 0, /* tp_as_sequence */
2762 0, /* tp_as_mapping */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002763 (hashfunc)pattern_hash, /* tp_hash */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002764 0, /* tp_call */
2765 0, /* tp_str */
2766 0, /* tp_getattro */
2767 0, /* tp_setattro */
2768 0, /* tp_as_buffer */
2769 Py_TPFLAGS_DEFAULT, /* tp_flags */
2770 pattern_doc, /* tp_doc */
2771 0, /* tp_traverse */
2772 0, /* tp_clear */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002773 pattern_richcompare, /* tp_richcompare */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002774 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2775 0, /* tp_iter */
2776 0, /* tp_iternext */
2777 pattern_methods, /* tp_methods */
2778 pattern_members, /* tp_members */
2779 pattern_getset, /* tp_getset */
2780};
2781
Eric V. Smith605bdae2016-09-11 08:55:43 -04002782/* Match objects do not support length or assignment, but do support
2783 __getitem__. */
2784static PyMappingMethods match_as_mapping = {
2785 NULL,
2786 (binaryfunc)match_getitem,
2787 NULL
2788};
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002789
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002790static PyMethodDef match_methods[] = {
2791 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2792 _SRE_SRE_MATCH_START_METHODDEF
2793 _SRE_SRE_MATCH_END_METHODDEF
2794 _SRE_SRE_MATCH_SPAN_METHODDEF
2795 _SRE_SRE_MATCH_GROUPS_METHODDEF
2796 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2797 _SRE_SRE_MATCH_EXPAND_METHODDEF
2798 _SRE_SRE_MATCH___COPY___METHODDEF
2799 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2800 {NULL, NULL}
2801};
2802
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002803static PyGetSetDef match_getset[] = {
2804 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
2805 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
2806 {"regs", (getter)match_regs_get, (setter)NULL},
2807 {NULL}
2808};
2809
2810#define MATCH_OFF(x) offsetof(MatchObject, x)
2811static PyMemberDef match_members[] = {
2812 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
2813 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
2814 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
2815 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
2816 {NULL}
2817};
2818
2819/* FIXME: implement setattr("string", None) as a special case (to
2820 detach the associated string, if any */
2821
2822static PyTypeObject Match_Type = {
2823 PyVarObject_HEAD_INIT(NULL,0)
2824 "_" SRE_MODULE ".SRE_Match",
2825 sizeof(MatchObject), sizeof(Py_ssize_t),
2826 (destructor)match_dealloc, /* tp_dealloc */
2827 0, /* tp_print */
2828 0, /* tp_getattr */
2829 0, /* tp_setattr */
2830 0, /* tp_reserved */
2831 (reprfunc)match_repr, /* tp_repr */
2832 0, /* tp_as_number */
2833 0, /* tp_as_sequence */
Eric V. Smith605bdae2016-09-11 08:55:43 -04002834 &match_as_mapping, /* tp_as_mapping */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002835 0, /* tp_hash */
2836 0, /* tp_call */
2837 0, /* tp_str */
2838 0, /* tp_getattro */
2839 0, /* tp_setattro */
2840 0, /* tp_as_buffer */
2841 Py_TPFLAGS_DEFAULT, /* tp_flags */
2842 match_doc, /* tp_doc */
2843 0, /* tp_traverse */
2844 0, /* tp_clear */
2845 0, /* tp_richcompare */
2846 0, /* tp_weaklistoffset */
2847 0, /* tp_iter */
2848 0, /* tp_iternext */
2849 match_methods, /* tp_methods */
2850 match_members, /* tp_members */
2851 match_getset, /* tp_getset */
2852};
2853
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002854static PyMethodDef scanner_methods[] = {
2855 _SRE_SRE_SCANNER_MATCH_METHODDEF
2856 _SRE_SRE_SCANNER_SEARCH_METHODDEF
2857 {NULL, NULL}
2858};
2859
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002860#define SCAN_OFF(x) offsetof(ScannerObject, x)
2861static PyMemberDef scanner_members[] = {
2862 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2863 {NULL} /* Sentinel */
2864};
2865
2866static PyTypeObject Scanner_Type = {
2867 PyVarObject_HEAD_INIT(NULL, 0)
2868 "_" SRE_MODULE ".SRE_Scanner",
2869 sizeof(ScannerObject), 0,
2870 (destructor)scanner_dealloc,/* tp_dealloc */
2871 0, /* tp_print */
2872 0, /* tp_getattr */
2873 0, /* tp_setattr */
2874 0, /* tp_reserved */
2875 0, /* tp_repr */
2876 0, /* tp_as_number */
2877 0, /* tp_as_sequence */
2878 0, /* tp_as_mapping */
2879 0, /* tp_hash */
2880 0, /* tp_call */
2881 0, /* tp_str */
2882 0, /* tp_getattro */
2883 0, /* tp_setattro */
2884 0, /* tp_as_buffer */
2885 Py_TPFLAGS_DEFAULT, /* tp_flags */
2886 0, /* tp_doc */
2887 0, /* tp_traverse */
2888 0, /* tp_clear */
2889 0, /* tp_richcompare */
2890 0, /* tp_weaklistoffset */
2891 0, /* tp_iter */
2892 0, /* tp_iternext */
2893 scanner_methods, /* tp_methods */
2894 scanner_members, /* tp_members */
2895 0, /* tp_getset */
2896};
2897
Guido van Rossumb700df92000-03-31 14:59:30 +00002898static PyMethodDef _functions[] = {
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002899 _SRE_COMPILE_METHODDEF
2900 _SRE_GETCODESIZE_METHODDEF
2901 _SRE_GETLOWER_METHODDEF
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002902 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002903};
2904
Martin v. Löwis1a214512008-06-11 05:26:20 +00002905static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002906 PyModuleDef_HEAD_INIT,
2907 "_" SRE_MODULE,
2908 NULL,
2909 -1,
2910 _functions,
2911 NULL,
2912 NULL,
2913 NULL,
2914 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002915};
2916
2917PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002918{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002919 PyObject* m;
2920 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002921 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002922
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002923 /* Patch object types */
2924 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2925 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002926 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002927
Martin v. Löwis1a214512008-06-11 05:26:20 +00002928 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002929 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002930 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002931 d = PyModule_GetDict(m);
2932
Christian Heimes217cfd12007-12-02 14:31:20 +00002933 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002934 if (x) {
2935 PyDict_SetItemString(d, "MAGIC", x);
2936 Py_DECREF(x);
2937 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002938
Christian Heimes217cfd12007-12-02 14:31:20 +00002939 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002940 if (x) {
2941 PyDict_SetItemString(d, "CODESIZE", x);
2942 Py_DECREF(x);
2943 }
2944
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002945 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2946 if (x) {
2947 PyDict_SetItemString(d, "MAXREPEAT", x);
2948 Py_DECREF(x);
2949 }
2950
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03002951 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2952 if (x) {
2953 PyDict_SetItemString(d, "MAXGROUPS", x);
2954 Py_DECREF(x);
2955 }
2956
Neal Norwitzfe537132007-08-26 03:55:15 +00002957 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002958 if (x) {
2959 PyDict_SetItemString(d, "copyright", x);
2960 Py_DECREF(x);
2961 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002962 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002963}
2964
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002965/* vim:ts=4:sw=4:et
2966*/