blob: 1cef7d02d2a0a26d0d77c5e50fce9c0bc2fdf4b7 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020038static const char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000063/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000065/* enables copy/deepcopy handling (work in progress) */
66#undef USE_BUILTIN_COPY
67
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000068/* -------------------------------------------------------------------- */
69
Fredrik Lundh80946112000-06-29 18:03:25 +000070#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000071#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000072#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000073/* fastest possible local call under MSVC */
74#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000075#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000076#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000077#else
78#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000079#endif
80
81/* error codes */
82#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000083#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000084#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000085#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000086#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000087
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000088#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000089#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000090#else
91#define TRACE(v)
92#endif
93
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000094/* -------------------------------------------------------------------- */
95/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000096
Fredrik Lundh436c3d582000-06-29 08:58:44 +000097#define SRE_IS_DIGIT(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030098 ((ch) < 128 && Py_ISDIGIT(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000099#define SRE_IS_SPACE(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300100 ((ch) < 128 && Py_ISSPACE(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000101#define SRE_IS_LINEBREAK(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300102 ((ch) == '\n')
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000103#define SRE_IS_ALNUM(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300104 ((ch) < 128 && Py_ISALNUM(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000105#define SRE_IS_WORD(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300106 ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
Guido van Rossumb700df92000-03-31 14:59:30 +0000107
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000108static unsigned int sre_lower(unsigned int ch)
109{
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300110 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000111}
112
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200113static unsigned int sre_upper(unsigned int ch)
114{
115 return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
116}
117
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000118/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000119/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
120 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000121#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000122#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
123
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000124static unsigned int sre_lower_locale(unsigned int ch)
125{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000126 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000127}
128
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200129static unsigned int sre_upper_locale(unsigned int ch)
130{
131 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
132}
133
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000134/* unicode-specific character predicates */
135
Victor Stinner0058b862011-09-29 03:27:47 +0200136#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
137#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
138#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
139#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
140#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000141
142static unsigned int sre_lower_unicode(unsigned int ch)
143{
Victor Stinner0058b862011-09-29 03:27:47 +0200144 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000145}
146
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200147static unsigned int sre_upper_unicode(unsigned int ch)
148{
149 return (unsigned int) Py_UNICODE_TOUPPER(ch);
150}
151
Guido van Rossumb700df92000-03-31 14:59:30 +0000152LOCAL(int)
153sre_category(SRE_CODE category, unsigned int ch)
154{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000155 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000156
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000157 case SRE_CATEGORY_DIGIT:
158 return SRE_IS_DIGIT(ch);
159 case SRE_CATEGORY_NOT_DIGIT:
160 return !SRE_IS_DIGIT(ch);
161 case SRE_CATEGORY_SPACE:
162 return SRE_IS_SPACE(ch);
163 case SRE_CATEGORY_NOT_SPACE:
164 return !SRE_IS_SPACE(ch);
165 case SRE_CATEGORY_WORD:
166 return SRE_IS_WORD(ch);
167 case SRE_CATEGORY_NOT_WORD:
168 return !SRE_IS_WORD(ch);
169 case SRE_CATEGORY_LINEBREAK:
170 return SRE_IS_LINEBREAK(ch);
171 case SRE_CATEGORY_NOT_LINEBREAK:
172 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000173
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000174 case SRE_CATEGORY_LOC_WORD:
175 return SRE_LOC_IS_WORD(ch);
176 case SRE_CATEGORY_LOC_NOT_WORD:
177 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000178
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000179 case SRE_CATEGORY_UNI_DIGIT:
180 return SRE_UNI_IS_DIGIT(ch);
181 case SRE_CATEGORY_UNI_NOT_DIGIT:
182 return !SRE_UNI_IS_DIGIT(ch);
183 case SRE_CATEGORY_UNI_SPACE:
184 return SRE_UNI_IS_SPACE(ch);
185 case SRE_CATEGORY_UNI_NOT_SPACE:
186 return !SRE_UNI_IS_SPACE(ch);
187 case SRE_CATEGORY_UNI_WORD:
188 return SRE_UNI_IS_WORD(ch);
189 case SRE_CATEGORY_UNI_NOT_WORD:
190 return !SRE_UNI_IS_WORD(ch);
191 case SRE_CATEGORY_UNI_LINEBREAK:
192 return SRE_UNI_IS_LINEBREAK(ch);
193 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
194 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000195 }
196 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000197}
198
199/* helpers */
200
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000201static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000202data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000203{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000204 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000205 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000206 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000207 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000208 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000209}
210
211static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000212data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000213{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000214 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000215 minsize = state->data_stack_base+size;
216 cursize = state->data_stack_size;
217 if (cursize < minsize) {
218 void* stack;
219 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300220 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000221 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000222 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000223 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000224 return SRE_ERROR_MEMORY;
225 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000226 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000227 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000228 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000229 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000230}
231
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000232/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000233
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300234#define SRE_CHAR Py_UCS1
235#define SIZEOF_SRE_CHAR 1
236#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300237#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000238
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300239/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000240
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300241#define SRE_CHAR Py_UCS2
242#define SIZEOF_SRE_CHAR 2
243#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300244#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000245
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300246/* generate 32-bit unicode version */
247
248#define SRE_CHAR Py_UCS4
249#define SIZEOF_SRE_CHAR 4
250#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300251#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000252
253/* -------------------------------------------------------------------- */
254/* factories and destructors */
255
256/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100257static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300258static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
Guido van Rossumb700df92000-03-31 14:59:30 +0000259
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300260
261/*[clinic input]
262module _sre
263class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
264class _sre.SRE_Match "MatchObject *" "&Match_Type"
265class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
266[clinic start generated code]*/
267/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
268
Larry Hastings2d0a69a2015-05-03 14:49:19 -0700269static PyTypeObject Pattern_Type;
270static PyTypeObject Match_Type;
271static PyTypeObject Scanner_Type;
272
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300273/*[clinic input]
274_sre.getcodesize -> int
275[clinic start generated code]*/
276
277static int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300278_sre_getcodesize_impl(PyObject *module)
279/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000280{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300281 return sizeof(SRE_CODE);
Guido van Rossumb700df92000-03-31 14:59:30 +0000282}
283
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300284/*[clinic input]
285_sre.getlower -> int
286
287 character: int
288 flags: int
289 /
290
291[clinic start generated code]*/
292
293static int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300294_sre_getlower_impl(PyObject *module, int character, int flags)
295/*[clinic end generated code: output=47eebc4c1214feb5 input=087d2f1c44bbca6f]*/
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000296{
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000297 if (flags & SRE_FLAG_LOCALE)
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300298 return sre_lower_locale(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000299 if (flags & SRE_FLAG_UNICODE)
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300300 return sre_lower_unicode(character);
301 return sre_lower(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000302}
303
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000304LOCAL(void)
305state_reset(SRE_STATE* state)
306{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000307 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000308 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000309
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000310 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000311 state->lastindex = -1;
312
313 state->repeat = NULL;
314
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000315 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000316}
317
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000318static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200319getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300320 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600321 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000322{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000323 /* given a python object, return a data pointer, a length (in
324 characters), and a character size. return NULL if the object
325 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000326
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000327 /* Unicode objects do not support the buffer API. So, get the data
328 directly instead. */
329 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200330 if (PyUnicode_READY(string) == -1)
331 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200332 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200333 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300334 *p_isbytes = 0;
335 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000336 }
337
Victor Stinner0058b862011-09-29 03:27:47 +0200338 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300339 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200340 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300341 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000342 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000343
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300344 *p_length = view->len;
345 *p_charsize = 1;
346 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000347
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300348 if (view->buf == NULL) {
349 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
350 PyBuffer_Release(view);
351 view->buf = NULL;
352 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000353 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300354 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000355}
356
357LOCAL(PyObject*)
358state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000359 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000360{
361 /* prepare state object */
362
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000363 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300364 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000365 void* ptr;
366
367 memset(state, 0, sizeof(SRE_STATE));
368
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300369 state->mark = PyMem_New(void *, pattern->groups * 2);
370 if (!state->mark) {
371 PyErr_NoMemory();
372 goto err;
373 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000374 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000375 state->lastindex = -1;
376
Benjamin Petersone48944b2012-03-07 14:50:25 -0600377 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300378 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000379 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600380 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000381
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300382 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600383 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200384 "cannot use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600385 goto err;
386 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300387 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600388 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200389 "cannot use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600390 goto err;
391 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000392
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000393 /* adjust boundaries */
394 if (start < 0)
395 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000396 else if (start > length)
397 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000398
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000399 if (end < 0)
400 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000401 else if (end > length)
402 end = length;
403
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300404 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000405 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000406
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000407 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000408
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000409 state->start = (void*) ((char*) ptr + start * state->charsize);
410 state->end = (void*) ((char*) ptr + end * state->charsize);
411
412 Py_INCREF(string);
413 state->string = string;
414 state->pos = start;
415 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000416
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200417 if (pattern->flags & SRE_FLAG_LOCALE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000418 state->lower = sre_lower_locale;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200419 state->upper = sre_upper_locale;
420 }
421 else if (pattern->flags & SRE_FLAG_UNICODE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000422 state->lower = sre_lower_unicode;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200423 state->upper = sre_upper_unicode;
424 }
425 else {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000426 state->lower = sre_lower;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200427 state->upper = sre_upper;
428 }
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000429
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000430 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600431 err:
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300432 PyMem_Del(state->mark);
433 state->mark = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600434 if (state->buffer.buf)
435 PyBuffer_Release(&state->buffer);
436 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000437}
438
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000439LOCAL(void)
440state_fini(SRE_STATE* state)
441{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600442 if (state->buffer.buf)
443 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000444 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000445 data_stack_dealloc(state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300446 PyMem_Del(state->mark);
447 state->mark = NULL;
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000448}
449
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000450/* calculate offset from start of string */
451#define STATE_OFFSET(state, member)\
452 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
453
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000454LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300455getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300456 PyObject* string, Py_ssize_t start, Py_ssize_t end)
457{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300458 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300459 if (PyBytes_CheckExact(string) &&
460 start == 0 && end == PyBytes_GET_SIZE(string)) {
461 Py_INCREF(string);
462 return string;
463 }
464 return PyBytes_FromStringAndSize(
465 (const char *)ptr + start, end - start);
466 }
467 else {
468 return PyUnicode_Substring(string, start, end);
469 }
470}
471
472LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000473state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000474{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000475 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000476
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000477 index = (index - 1) * 2;
478
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000479 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000480 if (empty)
481 /* want empty string */
482 i = j = 0;
483 else {
484 Py_INCREF(Py_None);
485 return Py_None;
486 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000487 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000488 i = STATE_OFFSET(state, state->mark[index]);
489 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000490 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000491
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300492 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000493}
494
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000495static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100496pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000497{
498 switch (status) {
499 case SRE_ERROR_RECURSION_LIMIT:
Yury Selivanovf488fb42015-07-03 01:04:23 -0400500 /* This error code seems to be unused. */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000501 PyErr_SetString(
Yury Selivanovf488fb42015-07-03 01:04:23 -0400502 PyExc_RecursionError,
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000503 "maximum recursion limit exceeded"
504 );
505 break;
506 case SRE_ERROR_MEMORY:
507 PyErr_NoMemory();
508 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000509 case SRE_ERROR_INTERRUPTED:
510 /* An exception has already been raised, so let it fly */
511 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000512 default:
513 /* other error codes indicate compiler/engine bugs */
514 PyErr_SetString(
515 PyExc_RuntimeError,
516 "internal error in regular expression engine"
517 );
518 }
519}
520
Guido van Rossumb700df92000-03-31 14:59:30 +0000521static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000522pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000523{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000524 if (self->weakreflist != NULL)
525 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000526 Py_XDECREF(self->pattern);
527 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000528 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000529 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000530}
531
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300532LOCAL(Py_ssize_t)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300533sre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300534{
535 if (state->charsize == 1)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300536 return sre_ucs1_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300537 if (state->charsize == 2)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300538 return sre_ucs2_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300539 assert(state->charsize == 4);
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300540 return sre_ucs4_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300541}
542
543LOCAL(Py_ssize_t)
544sre_search(SRE_STATE* state, SRE_CODE* pattern)
545{
546 if (state->charsize == 1)
547 return sre_ucs1_search(state, pattern);
548 if (state->charsize == 2)
549 return sre_ucs2_search(state, pattern);
550 assert(state->charsize == 4);
551 return sre_ucs4_search(state, pattern);
552}
553
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300554/*[clinic input]
555_sre.SRE_Pattern.match
556
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200557 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300558 pos: Py_ssize_t = 0
559 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300560
561Matches zero or more characters at the beginning of the string.
562[clinic start generated code]*/
563
Larry Hastings16c51912014-01-07 11:53:01 -0800564static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300565_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200566 Py_ssize_t pos, Py_ssize_t endpos)
567/*[clinic end generated code: output=ea2d838888510661 input=a2ba191647abebe5]*/
Larry Hastings16c51912014-01-07 11:53:01 -0800568{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000569 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100570 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300571 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000572
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300573 if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000574 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000575
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000576 state.ptr = state.start;
577
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000578 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
579
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300580 status = sre_match(&state, PatternObject_GetCode(self), 0);
Guido van Rossumb700df92000-03-31 14:59:30 +0000581
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000582 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300583 if (PyErr_Occurred()) {
584 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000585 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300586 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000587
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300588 match = pattern_new_match(self, &state, status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000589 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300590 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000591}
592
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300593/*[clinic input]
594_sre.SRE_Pattern.fullmatch
595
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200596 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300597 pos: Py_ssize_t = 0
598 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300599
600Matches against all of the string
601[clinic start generated code]*/
602
603static PyObject *
604_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200605 Py_ssize_t pos, Py_ssize_t endpos)
606/*[clinic end generated code: output=5833c47782a35f4a input=a6f640614aaefceb]*/
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200607{
608 SRE_STATE state;
609 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300610 PyObject *match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200611
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300612 if (!state_init(&state, self, string, pos, endpos))
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200613 return NULL;
614
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200615 state.ptr = state.start;
616
617 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
618
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300619 status = sre_match(&state, PatternObject_GetCode(self), 1);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200620
621 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300622 if (PyErr_Occurred()) {
623 state_fini(&state);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200624 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300625 }
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200626
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300627 match = pattern_new_match(self, &state, status);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200628 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300629 return match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200630}
631
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300632/*[clinic input]
633_sre.SRE_Pattern.search
634
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200635 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300636 pos: Py_ssize_t = 0
637 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300638
639Scan through string looking for a match, and return a corresponding match object instance.
640
641Return None if no position in the string matches.
642[clinic start generated code]*/
643
644static PyObject *
645_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200646 Py_ssize_t pos, Py_ssize_t endpos)
647/*[clinic end generated code: output=25f302a644e951e8 input=4ae5cb7dc38fed1b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000648{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000649 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100650 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300651 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000652
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300653 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000654 return NULL;
655
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000656 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
657
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300658 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000659
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000660 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
661
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300662 if (PyErr_Occurred()) {
663 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000664 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300665 }
Thomas Wouters89f507f2006-12-13 04:49:30 +0000666
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300667 match = pattern_new_match(self, &state, status);
668 state_fini(&state);
669 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000670}
671
672static PyObject*
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200673call(const char* module, const char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000674{
675 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000676 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000677 PyObject* func;
678 PyObject* result;
679
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000680 if (!args)
681 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000682 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000683 if (!name)
684 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000685 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000686 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000687 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000688 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000689 func = PyObject_GetAttrString(mod, function);
690 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000691 if (!func)
692 return NULL;
693 result = PyObject_CallObject(func, args);
694 Py_DECREF(func);
695 Py_DECREF(args);
696 return result;
697}
698
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000699#ifdef USE_BUILTIN_COPY
700static int
701deepcopy(PyObject** object, PyObject* memo)
702{
703 PyObject* copy;
704
705 copy = call(
706 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000707 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000708 );
709 if (!copy)
710 return 0;
711
Serhiy Storchaka57a01d32016-04-10 18:05:40 +0300712 Py_SETREF(*object, copy);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000713
714 return 1; /* success */
715}
716#endif
717
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300718/*[clinic input]
719_sre.SRE_Pattern.findall
720
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200721 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300722 pos: Py_ssize_t = 0
723 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300724
725Return a list of all non-overlapping matches of pattern in string.
726[clinic start generated code]*/
727
728static PyObject *
729_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200730 Py_ssize_t pos, Py_ssize_t endpos)
731/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000732{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000733 SRE_STATE state;
734 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100735 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000736 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000737
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300738 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000739 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000740
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000741 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000742 if (!list) {
743 state_fini(&state);
744 return NULL;
745 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000746
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000747 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000748
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000749 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000750
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000751 state_reset(&state);
752
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000753 state.ptr = state.start;
754
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300755 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300756 if (PyErr_Occurred())
757 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000758
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000759 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000760 if (status == 0)
761 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000762 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000763 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000764 }
Tim Peters3d563502006-01-21 02:47:53 +0000765
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000766 /* don't bother to build a match object */
767 switch (self->groups) {
768 case 0:
769 b = STATE_OFFSET(&state, state.start);
770 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300771 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300772 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000773 if (!item)
774 goto error;
775 break;
776 case 1:
777 item = state_getslice(&state, 1, string, 1);
778 if (!item)
779 goto error;
780 break;
781 default:
782 item = PyTuple_New(self->groups);
783 if (!item)
784 goto error;
785 for (i = 0; i < self->groups; i++) {
786 PyObject* o = state_getslice(&state, i+1, string, 1);
787 if (!o) {
788 Py_DECREF(item);
789 goto error;
790 }
791 PyTuple_SET_ITEM(item, i, o);
792 }
793 break;
794 }
795
796 status = PyList_Append(list, item);
797 Py_DECREF(item);
798 if (status < 0)
799 goto error;
800
801 if (state.ptr == state.start)
802 state.start = (void*) ((char*) state.ptr + state.charsize);
803 else
804 state.start = state.ptr;
805
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000806 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000807
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000808 state_fini(&state);
809 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000810
811error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000812 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000813 state_fini(&state);
814 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000815
Guido van Rossumb700df92000-03-31 14:59:30 +0000816}
817
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300818/*[clinic input]
819_sre.SRE_Pattern.finditer
820
821 string: object
822 pos: Py_ssize_t = 0
823 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
824
825Return an iterator over all non-overlapping matches for the RE pattern in string.
826
827For each match, the iterator returns a match object.
828[clinic start generated code]*/
829
830static PyObject *
831_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
832 Py_ssize_t pos, Py_ssize_t endpos)
833/*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
Fredrik Lundh703ce812001-10-24 22:16:30 +0000834{
835 PyObject* scanner;
836 PyObject* search;
837 PyObject* iterator;
838
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300839 scanner = pattern_scanner(self, string, pos, endpos);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000840 if (!scanner)
841 return NULL;
842
843 search = PyObject_GetAttrString(scanner, "search");
844 Py_DECREF(scanner);
845 if (!search)
846 return NULL;
847
848 iterator = PyCallIter_New(search, Py_None);
849 Py_DECREF(search);
850
851 return iterator;
852}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000853
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300854/*[clinic input]
855_sre.SRE_Pattern.scanner
856
857 string: object
858 pos: Py_ssize_t = 0
859 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
860
861[clinic start generated code]*/
862
863static PyObject *
864_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
865 Py_ssize_t pos, Py_ssize_t endpos)
866/*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
867{
868 return pattern_scanner(self, string, pos, endpos);
869}
870
871/*[clinic input]
872_sre.SRE_Pattern.split
873
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200874 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300875 maxsplit: Py_ssize_t = 0
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300876
877Split string by the occurrences of pattern.
878[clinic start generated code]*/
879
880static PyObject *
881_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200882 Py_ssize_t maxsplit)
883/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000884{
885 SRE_STATE state;
886 PyObject* list;
887 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100888 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000889 Py_ssize_t n;
890 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000891 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000892
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200893 assert(self->codesize != 0);
894 if (self->code[0] != SRE_OP_INFO || self->code[3] == 0) {
895 if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) {
896 PyErr_SetString(PyExc_ValueError,
897 "split() requires a non-empty pattern match.");
898 return NULL;
899 }
900 if (PyErr_WarnEx(PyExc_FutureWarning,
901 "split() requires a non-empty pattern match.",
902 1) < 0)
903 return NULL;
904 }
905
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300906 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000907 return NULL;
908
909 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000910 if (!list) {
911 state_fini(&state);
912 return NULL;
913 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000914
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000915 n = 0;
916 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000917
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000918 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000919
920 state_reset(&state);
921
922 state.ptr = state.start;
923
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300924 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300925 if (PyErr_Occurred())
926 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000927
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000928 if (status <= 0) {
929 if (status == 0)
930 break;
931 pattern_error(status);
932 goto error;
933 }
Tim Peters3d563502006-01-21 02:47:53 +0000934
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000935 if (state.start == state.ptr) {
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +0300936 if (last == state.end || state.ptr == state.end)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000937 break;
938 /* skip one character */
939 state.start = (void*) ((char*) state.ptr + state.charsize);
940 continue;
941 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000942
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000943 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300944 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000945 string, STATE_OFFSET(&state, last),
946 STATE_OFFSET(&state, state.start)
947 );
948 if (!item)
949 goto error;
950 status = PyList_Append(list, item);
951 Py_DECREF(item);
952 if (status < 0)
953 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000954
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000955 /* add groups (if any) */
956 for (i = 0; i < self->groups; i++) {
957 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000958 if (!item)
959 goto error;
960 status = PyList_Append(list, item);
961 Py_DECREF(item);
962 if (status < 0)
963 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000964 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000965
966 n = n + 1;
967
968 last = state.start = state.ptr;
969
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000970 }
971
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000972 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300973 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000974 string, STATE_OFFSET(&state, last), state.endpos
975 );
976 if (!item)
977 goto error;
978 status = PyList_Append(list, item);
979 Py_DECREF(item);
980 if (status < 0)
981 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000982
983 state_fini(&state);
984 return list;
985
986error:
987 Py_DECREF(list);
988 state_fini(&state);
989 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000990
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000991}
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000992
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000993static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000994pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000995 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000996{
997 SRE_STATE state;
998 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +0300999 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001000 PyObject* item;
1001 PyObject* filter;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001002 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001003 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01001004 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001005 Py_ssize_t n;
1006 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001007 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001008 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001009 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001010
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001011 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001012 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001013 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001014 Py_INCREF(filter);
1015 filter_is_callable = 1;
1016 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001017 /* if not callable, check if it's a literal string */
1018 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001019 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001020 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001021 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001022 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001023 if (charsize == 1)
1024 literal = memchr(ptr, '\\', n) == NULL;
1025 else
1026 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001027 } else {
1028 PyErr_Clear();
1029 literal = 0;
1030 }
Benjamin Petersone48944b2012-03-07 14:50:25 -06001031 if (view.buf)
1032 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001033 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001034 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001035 Py_INCREF(filter);
1036 filter_is_callable = 0;
1037 } else {
1038 /* not a literal; hand it over to the template compiler */
1039 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001040 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001041 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001042 );
1043 if (!filter)
1044 return NULL;
1045 filter_is_callable = PyCallable_Check(filter);
1046 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00001047 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001048
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001049 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001050 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001051 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001052 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001053
1054 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001055 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001056 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001057 state_fini(&state);
1058 return NULL;
1059 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001060
1061 n = i = 0;
1062
1063 while (!count || n < count) {
1064
1065 state_reset(&state);
1066
1067 state.ptr = state.start;
1068
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001069 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001070 if (PyErr_Occurred())
1071 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001072
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001073 if (status <= 0) {
1074 if (status == 0)
1075 break;
1076 pattern_error(status);
1077 goto error;
1078 }
Tim Peters3d563502006-01-21 02:47:53 +00001079
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001080 b = STATE_OFFSET(&state, state.start);
1081 e = STATE_OFFSET(&state, state.ptr);
1082
1083 if (i < b) {
1084 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001085 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001086 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001087 if (!item)
1088 goto error;
1089 status = PyList_Append(list, item);
1090 Py_DECREF(item);
1091 if (status < 0)
1092 goto error;
1093
1094 } else if (i == b && i == e && n > 0)
1095 /* ignore empty match on latest position */
1096 goto next;
1097
1098 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001099 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001100 match = pattern_new_match(self, &state, 1);
1101 if (!match)
1102 goto error;
Victor Stinner7bfb42d2016-12-05 17:04:32 +01001103 item = PyObject_CallFunctionObjArgs(filter, match, NULL);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001104 Py_DECREF(match);
1105 if (!item)
1106 goto error;
1107 } else {
1108 /* filter is literal string */
1109 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001110 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001111 }
1112
1113 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001114 if (item != Py_None) {
1115 status = PyList_Append(list, item);
1116 Py_DECREF(item);
1117 if (status < 0)
1118 goto error;
1119 }
Tim Peters3d563502006-01-21 02:47:53 +00001120
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001121 i = e;
1122 n = n + 1;
1123
1124next:
1125 /* move on */
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001126 if (state.ptr == state.end)
1127 break;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001128 if (state.ptr == state.start)
1129 state.start = (void*) ((char*) state.ptr + state.charsize);
1130 else
1131 state.start = state.ptr;
1132
1133 }
1134
1135 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001136 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001137 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001138 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001139 if (!item)
1140 goto error;
1141 status = PyList_Append(list, item);
1142 Py_DECREF(item);
1143 if (status < 0)
1144 goto error;
1145 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001146
1147 state_fini(&state);
1148
Guido van Rossum4e173842001-12-07 04:25:10 +00001149 Py_DECREF(filter);
1150
Fredrik Lundhdac58492001-10-21 21:48:30 +00001151 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001152 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001153 if (!joiner) {
1154 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001155 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001156 }
1157 if (PyList_GET_SIZE(list) == 0) {
1158 Py_DECREF(list);
1159 item = joiner;
1160 }
1161 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001162 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001163 item = _PyBytes_Join(joiner, list);
1164 else
1165 item = PyUnicode_Join(joiner, list);
1166 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001167 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001168 if (!item)
1169 return NULL;
1170 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001171
1172 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001173 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001174
1175 return item;
1176
1177error:
1178 Py_DECREF(list);
1179 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001180 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001181 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001182
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001183}
1184
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001185/*[clinic input]
1186_sre.SRE_Pattern.sub
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001187
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001188 repl: object
1189 string: object
1190 count: Py_ssize_t = 0
1191
1192Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1193[clinic start generated code]*/
1194
1195static PyObject *
1196_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1197 PyObject *string, Py_ssize_t count)
1198/*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1199{
1200 return pattern_subx(self, repl, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001201}
1202
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001203/*[clinic input]
1204_sre.SRE_Pattern.subn
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001205
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001206 repl: object
1207 string: object
1208 count: Py_ssize_t = 0
1209
1210Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1211[clinic start generated code]*/
1212
1213static PyObject *
1214_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1215 PyObject *string, Py_ssize_t count)
1216/*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1217{
1218 return pattern_subx(self, repl, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001219}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001220
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001221/*[clinic input]
1222_sre.SRE_Pattern.__copy__
1223
1224[clinic start generated code]*/
1225
1226static PyObject *
1227_sre_SRE_Pattern___copy___impl(PatternObject *self)
1228/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001229{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001230#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001231 PatternObject* copy;
1232 int offset;
1233
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001234 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
1235 if (!copy)
1236 return NULL;
1237
1238 offset = offsetof(PatternObject, groups);
1239
1240 Py_XINCREF(self->groupindex);
1241 Py_XINCREF(self->indexgroup);
1242 Py_XINCREF(self->pattern);
1243
1244 memcpy((char*) copy + offset, (char*) self + offset,
1245 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00001246 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001247
1248 return (PyObject*) copy;
1249#else
1250 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
1251 return NULL;
1252#endif
1253}
1254
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001255/*[clinic input]
1256_sre.SRE_Pattern.__deepcopy__
1257
1258 memo: object
1259
1260[clinic start generated code]*/
1261
1262static PyObject *
1263_sre_SRE_Pattern___deepcopy___impl(PatternObject *self, PyObject *memo)
1264/*[clinic end generated code: output=75efe69bd12c5d7d input=3959719482c07f70]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001265{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001266#ifdef USE_BUILTIN_COPY
1267 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00001268
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001269 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001270 if (!copy)
1271 return NULL;
1272
1273 if (!deepcopy(&copy->groupindex, memo) ||
1274 !deepcopy(&copy->indexgroup, memo) ||
1275 !deepcopy(&copy->pattern, memo)) {
1276 Py_DECREF(copy);
1277 return NULL;
1278 }
1279
1280#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001281 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
1282 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001283#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001284}
1285
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001286static PyObject *
1287pattern_repr(PatternObject *obj)
1288{
1289 static const struct {
1290 const char *name;
1291 int value;
1292 } flag_names[] = {
1293 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1294 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1295 {"re.LOCALE", SRE_FLAG_LOCALE},
1296 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1297 {"re.DOTALL", SRE_FLAG_DOTALL},
1298 {"re.UNICODE", SRE_FLAG_UNICODE},
1299 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1300 {"re.DEBUG", SRE_FLAG_DEBUG},
1301 {"re.ASCII", SRE_FLAG_ASCII},
1302 };
1303 PyObject *result = NULL;
1304 PyObject *flag_items;
Victor Stinner706768c2014-08-16 01:03:39 +02001305 size_t i;
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001306 int flags = obj->flags;
1307
1308 /* Omit re.UNICODE for valid string patterns. */
1309 if (obj->isbytes == 0 &&
1310 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1311 SRE_FLAG_UNICODE)
1312 flags &= ~SRE_FLAG_UNICODE;
1313
1314 flag_items = PyList_New(0);
1315 if (!flag_items)
1316 return NULL;
1317
1318 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1319 if (flags & flag_names[i].value) {
1320 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1321 if (!item)
1322 goto done;
1323
1324 if (PyList_Append(flag_items, item) < 0) {
1325 Py_DECREF(item);
1326 goto done;
1327 }
1328 Py_DECREF(item);
1329 flags &= ~flag_names[i].value;
1330 }
1331 }
1332 if (flags) {
1333 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1334 if (!item)
1335 goto done;
1336
1337 if (PyList_Append(flag_items, item) < 0) {
1338 Py_DECREF(item);
1339 goto done;
1340 }
1341 Py_DECREF(item);
1342 }
1343
1344 if (PyList_Size(flag_items) > 0) {
1345 PyObject *flags_result;
1346 PyObject *sep = PyUnicode_FromString("|");
1347 if (!sep)
1348 goto done;
1349 flags_result = PyUnicode_Join(sep, flag_items);
1350 Py_DECREF(sep);
1351 if (!flags_result)
1352 goto done;
1353 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1354 obj->pattern, flags_result);
1355 Py_DECREF(flags_result);
1356 }
1357 else {
1358 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1359 }
1360
1361done:
1362 Py_DECREF(flag_items);
1363 return result;
1364}
1365
Raymond Hettinger94478742004-09-24 04:31:19 +00001366PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
1367
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001368/* PatternObject's 'groupindex' method. */
1369static PyObject *
1370pattern_groupindex(PatternObject *self)
1371{
1372 return PyDictProxy_New(self->groupindex);
1373}
1374
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001375static int _validate(PatternObject *self); /* Forward */
1376
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001377/*[clinic input]
1378_sre.compile
1379
1380 pattern: object
1381 flags: int
1382 code: object(subclass_of='&PyList_Type')
1383 groups: Py_ssize_t
Victor Stinner726a57d2016-11-22 23:04:39 +01001384 groupindex: object(subclass_of='&PyDict_Type')
1385 indexgroup: object(subclass_of='&PyTuple_Type')
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001386
1387[clinic start generated code]*/
1388
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001389static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001390_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001391 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1392 PyObject *indexgroup)
Victor Stinner726a57d2016-11-22 23:04:39 +01001393/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001394{
1395 /* "compile" pattern descriptor to pattern object */
1396
1397 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001398 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001399
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001400 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001401 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001402 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1403 if (!self)
1404 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001405 self->weakreflist = NULL;
1406 self->pattern = NULL;
1407 self->groupindex = NULL;
1408 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001409
1410 self->codesize = n;
1411
1412 for (i = 0; i < n; i++) {
1413 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001414 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001415 self->code[i] = (SRE_CODE) value;
1416 if ((unsigned long) self->code[i] != value) {
1417 PyErr_SetString(PyExc_OverflowError,
1418 "regular expression code size limit exceeded");
1419 break;
1420 }
1421 }
1422
1423 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001424 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001425 return NULL;
1426 }
1427
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001428 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001429 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001430 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001431 else {
1432 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001433 int charsize;
1434 Py_buffer view;
1435 view.buf = NULL;
1436 if (!getstring(pattern, &p_length, &self->isbytes,
1437 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001438 Py_DECREF(self);
1439 return NULL;
1440 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001441 if (view.buf)
1442 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001443 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001444
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001445 Py_INCREF(pattern);
1446 self->pattern = pattern;
1447
1448 self->flags = flags;
1449
1450 self->groups = groups;
1451
Victor Stinnerb44fb122016-11-21 16:35:08 +01001452 Py_INCREF(groupindex);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001453 self->groupindex = groupindex;
1454
Victor Stinnerb44fb122016-11-21 16:35:08 +01001455 Py_INCREF(indexgroup);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001456 self->indexgroup = indexgroup;
1457
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001458 if (!_validate(self)) {
1459 Py_DECREF(self);
1460 return NULL;
1461 }
1462
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001463 return (PyObject*) self;
1464}
1465
Guido van Rossumb700df92000-03-31 14:59:30 +00001466/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001467/* Code validation */
1468
1469/* To learn more about this code, have a look at the _compile() function in
1470 Lib/sre_compile.py. The validation functions below checks the code array
1471 for conformance with the code patterns generated there.
1472
1473 The nice thing about the generated code is that it is position-independent:
1474 all jumps are relative jumps forward. Also, jumps don't cross each other:
1475 the target of a later jump is always earlier than the target of an earlier
1476 jump. IOW, this is okay:
1477
1478 J---------J-------T--------T
1479 \ \_____/ /
1480 \______________________/
1481
1482 but this is not:
1483
1484 J---------J-------T--------T
1485 \_________\_____/ /
1486 \____________/
1487
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001488 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001489*/
1490
1491/* Defining this one enables tracing of the validator */
1492#undef VVERBOSE
1493
1494/* Trace macro for the validator */
1495#if defined(VVERBOSE)
1496#define VTRACE(v) printf v
1497#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001498#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001499#endif
1500
1501/* Report failure */
1502#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1503
1504/* Extract opcode, argument, or skip count from code array */
1505#define GET_OP \
1506 do { \
1507 VTRACE(("%p: ", code)); \
1508 if (code >= end) FAIL; \
1509 op = *code++; \
1510 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1511 } while (0)
1512#define GET_ARG \
1513 do { \
1514 VTRACE(("%p= ", code)); \
1515 if (code >= end) FAIL; \
1516 arg = *code++; \
1517 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1518 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001519#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001520 do { \
1521 VTRACE(("%p= ", code)); \
1522 if (code >= end) FAIL; \
1523 skip = *code; \
1524 VTRACE(("%lu (skip to %p)\n", \
1525 (unsigned long)skip, code+skip)); \
Benjamin Petersonca470632016-09-06 13:47:26 -07001526 if (skip-adj > (uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001527 FAIL; \
1528 code++; \
1529 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001530#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001531
1532static int
1533_validate_charset(SRE_CODE *code, SRE_CODE *end)
1534{
1535 /* Some variables are manipulated by the macros above */
1536 SRE_CODE op;
1537 SRE_CODE arg;
1538 SRE_CODE offset;
1539 int i;
1540
1541 while (code < end) {
1542 GET_OP;
1543 switch (op) {
1544
1545 case SRE_OP_NEGATE:
1546 break;
1547
1548 case SRE_OP_LITERAL:
1549 GET_ARG;
1550 break;
1551
1552 case SRE_OP_RANGE:
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +02001553 case SRE_OP_RANGE_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001554 GET_ARG;
1555 GET_ARG;
1556 break;
1557
1558 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001559 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Benjamin Petersonca470632016-09-06 13:47:26 -07001560 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001561 FAIL;
1562 code += offset;
1563 break;
1564
1565 case SRE_OP_BIGCHARSET:
1566 GET_ARG; /* Number of blocks */
1567 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001568 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001569 FAIL;
1570 /* Make sure that each byte points to a valid block */
1571 for (i = 0; i < 256; i++) {
1572 if (((unsigned char *)code)[i] >= arg)
1573 FAIL;
1574 }
1575 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001576 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Benjamin Petersonca470632016-09-06 13:47:26 -07001577 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001578 FAIL;
1579 code += offset;
1580 break;
1581
1582 case SRE_OP_CATEGORY:
1583 GET_ARG;
1584 switch (arg) {
1585 case SRE_CATEGORY_DIGIT:
1586 case SRE_CATEGORY_NOT_DIGIT:
1587 case SRE_CATEGORY_SPACE:
1588 case SRE_CATEGORY_NOT_SPACE:
1589 case SRE_CATEGORY_WORD:
1590 case SRE_CATEGORY_NOT_WORD:
1591 case SRE_CATEGORY_LINEBREAK:
1592 case SRE_CATEGORY_NOT_LINEBREAK:
1593 case SRE_CATEGORY_LOC_WORD:
1594 case SRE_CATEGORY_LOC_NOT_WORD:
1595 case SRE_CATEGORY_UNI_DIGIT:
1596 case SRE_CATEGORY_UNI_NOT_DIGIT:
1597 case SRE_CATEGORY_UNI_SPACE:
1598 case SRE_CATEGORY_UNI_NOT_SPACE:
1599 case SRE_CATEGORY_UNI_WORD:
1600 case SRE_CATEGORY_UNI_NOT_WORD:
1601 case SRE_CATEGORY_UNI_LINEBREAK:
1602 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1603 break;
1604 default:
1605 FAIL;
1606 }
1607 break;
1608
1609 default:
1610 FAIL;
1611
1612 }
1613 }
1614
1615 return 1;
1616}
1617
1618static int
1619_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1620{
1621 /* Some variables are manipulated by the macros above */
1622 SRE_CODE op;
1623 SRE_CODE arg;
1624 SRE_CODE skip;
1625
1626 VTRACE(("code=%p, end=%p\n", code, end));
1627
1628 if (code > end)
1629 FAIL;
1630
1631 while (code < end) {
1632 GET_OP;
1633 switch (op) {
1634
1635 case SRE_OP_MARK:
1636 /* We don't check whether marks are properly nested; the
1637 sre_match() code is robust even if they don't, and the worst
1638 you can get is nonsensical match results. */
1639 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001640 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001641 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1642 FAIL;
1643 }
1644 break;
1645
1646 case SRE_OP_LITERAL:
1647 case SRE_OP_NOT_LITERAL:
1648 case SRE_OP_LITERAL_IGNORE:
1649 case SRE_OP_NOT_LITERAL_IGNORE:
1650 GET_ARG;
1651 /* The arg is just a character, nothing to check */
1652 break;
1653
1654 case SRE_OP_SUCCESS:
1655 case SRE_OP_FAILURE:
1656 /* Nothing to check; these normally end the matching process */
1657 break;
1658
1659 case SRE_OP_AT:
1660 GET_ARG;
1661 switch (arg) {
1662 case SRE_AT_BEGINNING:
1663 case SRE_AT_BEGINNING_STRING:
1664 case SRE_AT_BEGINNING_LINE:
1665 case SRE_AT_END:
1666 case SRE_AT_END_LINE:
1667 case SRE_AT_END_STRING:
1668 case SRE_AT_BOUNDARY:
1669 case SRE_AT_NON_BOUNDARY:
1670 case SRE_AT_LOC_BOUNDARY:
1671 case SRE_AT_LOC_NON_BOUNDARY:
1672 case SRE_AT_UNI_BOUNDARY:
1673 case SRE_AT_UNI_NON_BOUNDARY:
1674 break;
1675 default:
1676 FAIL;
1677 }
1678 break;
1679
1680 case SRE_OP_ANY:
1681 case SRE_OP_ANY_ALL:
1682 /* These have no operands */
1683 break;
1684
1685 case SRE_OP_IN:
1686 case SRE_OP_IN_IGNORE:
1687 GET_SKIP;
1688 /* Stop 1 before the end; we check the FAILURE below */
1689 if (!_validate_charset(code, code+skip-2))
1690 FAIL;
1691 if (code[skip-2] != SRE_OP_FAILURE)
1692 FAIL;
1693 code += skip-1;
1694 break;
1695
1696 case SRE_OP_INFO:
1697 {
1698 /* A minimal info field is
1699 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1700 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1701 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001702 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001703 SRE_CODE *newcode;
1704 GET_SKIP;
1705 newcode = code+skip-1;
1706 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001707 GET_ARG;
1708 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001709 /* Check that only valid flags are present */
1710 if ((flags & ~(SRE_INFO_PREFIX |
1711 SRE_INFO_LITERAL |
1712 SRE_INFO_CHARSET)) != 0)
1713 FAIL;
1714 /* PREFIX and CHARSET are mutually exclusive */
1715 if ((flags & SRE_INFO_PREFIX) &&
1716 (flags & SRE_INFO_CHARSET))
1717 FAIL;
1718 /* LITERAL implies PREFIX */
1719 if ((flags & SRE_INFO_LITERAL) &&
1720 !(flags & SRE_INFO_PREFIX))
1721 FAIL;
1722 /* Validate the prefix */
1723 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001724 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001725 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001726 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001727 /* Here comes the prefix string */
Benjamin Petersonca470632016-09-06 13:47:26 -07001728 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001729 FAIL;
1730 code += prefix_len;
1731 /* And here comes the overlap table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001732 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001733 FAIL;
1734 /* Each overlap value should be < prefix_len */
1735 for (i = 0; i < prefix_len; i++) {
1736 if (code[i] >= prefix_len)
1737 FAIL;
1738 }
1739 code += prefix_len;
1740 }
1741 /* Validate the charset */
1742 if (flags & SRE_INFO_CHARSET) {
1743 if (!_validate_charset(code, newcode-1))
1744 FAIL;
1745 if (newcode[-1] != SRE_OP_FAILURE)
1746 FAIL;
1747 code = newcode;
1748 }
1749 else if (code != newcode) {
1750 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1751 FAIL;
1752 }
1753 }
1754 break;
1755
1756 case SRE_OP_BRANCH:
1757 {
1758 SRE_CODE *target = NULL;
1759 for (;;) {
1760 GET_SKIP;
1761 if (skip == 0)
1762 break;
1763 /* Stop 2 before the end; we check the JUMP below */
1764 if (!_validate_inner(code, code+skip-3, groups))
1765 FAIL;
1766 code += skip-3;
1767 /* Check that it ends with a JUMP, and that each JUMP
1768 has the same target */
1769 GET_OP;
1770 if (op != SRE_OP_JUMP)
1771 FAIL;
1772 GET_SKIP;
1773 if (target == NULL)
1774 target = code+skip-1;
1775 else if (code+skip-1 != target)
1776 FAIL;
1777 }
1778 }
1779 break;
1780
1781 case SRE_OP_REPEAT_ONE:
1782 case SRE_OP_MIN_REPEAT_ONE:
1783 {
1784 SRE_CODE min, max;
1785 GET_SKIP;
1786 GET_ARG; min = arg;
1787 GET_ARG; max = arg;
1788 if (min > max)
1789 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001790 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001791 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001792 if (!_validate_inner(code, code+skip-4, groups))
1793 FAIL;
1794 code += skip-4;
1795 GET_OP;
1796 if (op != SRE_OP_SUCCESS)
1797 FAIL;
1798 }
1799 break;
1800
1801 case SRE_OP_REPEAT:
1802 {
1803 SRE_CODE min, max;
1804 GET_SKIP;
1805 GET_ARG; min = arg;
1806 GET_ARG; max = arg;
1807 if (min > max)
1808 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001809 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001810 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001811 if (!_validate_inner(code, code+skip-3, groups))
1812 FAIL;
1813 code += skip-3;
1814 GET_OP;
1815 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1816 FAIL;
1817 }
1818 break;
1819
1820 case SRE_OP_GROUPREF:
1821 case SRE_OP_GROUPREF_IGNORE:
1822 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001823 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001824 FAIL;
1825 break;
1826
1827 case SRE_OP_GROUPREF_EXISTS:
1828 /* The regex syntax for this is: '(?(group)then|else)', where
1829 'group' is either an integer group number or a group name,
1830 'then' and 'else' are sub-regexes, and 'else' is optional. */
1831 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001832 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001833 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001834 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001835 code--; /* The skip is relative to the first arg! */
1836 /* There are two possibilities here: if there is both a 'then'
1837 part and an 'else' part, the generated code looks like:
1838
1839 GROUPREF_EXISTS
1840 <group>
1841 <skipyes>
1842 ...then part...
1843 JUMP
1844 <skipno>
1845 (<skipyes> jumps here)
1846 ...else part...
1847 (<skipno> jumps here)
1848
1849 If there is only a 'then' part, it looks like:
1850
1851 GROUPREF_EXISTS
1852 <group>
1853 <skip>
1854 ...then part...
1855 (<skip> jumps here)
1856
1857 There is no direct way to decide which it is, and we don't want
1858 to allow arbitrary jumps anywhere in the code; so we just look
1859 for a JUMP opcode preceding our skip target.
1860 */
Benjamin Petersonca470632016-09-06 13:47:26 -07001861 if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001862 code[skip-3] == SRE_OP_JUMP)
1863 {
1864 VTRACE(("both then and else parts present\n"));
1865 if (!_validate_inner(code+1, code+skip-3, groups))
1866 FAIL;
1867 code += skip-2; /* Position after JUMP, at <skipno> */
1868 GET_SKIP;
1869 if (!_validate_inner(code, code+skip-1, groups))
1870 FAIL;
1871 code += skip-1;
1872 }
1873 else {
1874 VTRACE(("only a then part present\n"));
1875 if (!_validate_inner(code+1, code+skip-1, groups))
1876 FAIL;
1877 code += skip-1;
1878 }
1879 break;
1880
1881 case SRE_OP_ASSERT:
1882 case SRE_OP_ASSERT_NOT:
1883 GET_SKIP;
1884 GET_ARG; /* 0 for lookahead, width for lookbehind */
1885 code--; /* Back up over arg to simplify math below */
1886 if (arg & 0x80000000)
1887 FAIL; /* Width too large */
1888 /* Stop 1 before the end; we check the SUCCESS below */
1889 if (!_validate_inner(code+1, code+skip-2, groups))
1890 FAIL;
1891 code += skip-2;
1892 GET_OP;
1893 if (op != SRE_OP_SUCCESS)
1894 FAIL;
1895 break;
1896
1897 default:
1898 FAIL;
1899
1900 }
1901 }
1902
1903 VTRACE(("okay\n"));
1904 return 1;
1905}
1906
1907static int
1908_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1909{
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001910 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1911 code >= end || end[-1] != SRE_OP_SUCCESS)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001912 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001913 return _validate_inner(code, end-1, groups);
1914}
1915
1916static int
1917_validate(PatternObject *self)
1918{
1919 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1920 {
1921 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1922 return 0;
1923 }
1924 else
1925 VTRACE(("Success!\n"));
1926 return 1;
1927}
1928
1929/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001930/* match methods */
1931
1932static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001933match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001934{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001935 Py_XDECREF(self->regs);
1936 Py_XDECREF(self->string);
1937 Py_DECREF(self->pattern);
1938 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001939}
1940
1941static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001942match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001943{
Serhiy Storchaka25324972013-10-16 12:46:28 +03001944 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001945 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001946 Py_buffer view;
1947 PyObject *result;
1948 void* ptr;
1949
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001950 if (index < 0 || index >= self->groups) {
1951 /* raise IndexError if we were given a bad group number */
1952 PyErr_SetString(
1953 PyExc_IndexError,
1954 "no such group"
1955 );
1956 return NULL;
1957 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001958
Fredrik Lundh6f013982000-07-03 18:44:21 +00001959 index *= 2;
1960
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001961 if (self->string == Py_None || self->mark[index] < 0) {
1962 /* return default value if the string or group is undefined */
1963 Py_INCREF(def);
1964 return def;
1965 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001966
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001967 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001968 if (ptr == NULL)
1969 return NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001970 result = getslice(isbytes, ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001971 self->string, self->mark[index], self->mark[index+1]);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001972 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001973 PyBuffer_Release(&view);
1974 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001975}
1976
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001977static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001978match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001979{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001980 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001981
Guido van Rossumddefaf32007-01-14 03:31:43 +00001982 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001983 /* Default value */
1984 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001985
Serhiy Storchaka977b3ac2016-06-18 16:48:07 +03001986 if (PyIndex_Check(index)) {
1987 return PyNumber_AsSsize_t(index, NULL);
1988 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001989
Fredrik Lundh6f013982000-07-03 18:44:21 +00001990 i = -1;
1991
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001992 if (self->pattern->groupindex) {
1993 index = PyObject_GetItem(self->pattern->groupindex, index);
1994 if (index) {
Neal Norwitz1fe5f382007-08-31 04:32:55 +00001995 if (PyLong_Check(index))
Christian Heimes217cfd12007-12-02 14:31:20 +00001996 i = PyLong_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001997 Py_DECREF(index);
1998 } else
1999 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002000 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002001
2002 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002003}
2004
2005static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002006match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002007{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002008 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002009}
2010
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002011/*[clinic input]
2012_sre.SRE_Match.expand
2013
2014 template: object
2015
2016Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
2017[clinic start generated code]*/
2018
2019static PyObject *
2020_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
2021/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002022{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002023 /* delegate to Python code */
2024 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00002025 SRE_PY_MODULE, "_expand",
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002026 PyTuple_Pack(3, self->pattern, self, template)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002027 );
2028}
2029
2030static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002031match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002032{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002033 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002034 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002035
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002036 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002037
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002038 switch (size) {
2039 case 0:
2040 result = match_getslice(self, Py_False, Py_None);
2041 break;
2042 case 1:
2043 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2044 break;
2045 default:
2046 /* fetch multiple items */
2047 result = PyTuple_New(size);
2048 if (!result)
2049 return NULL;
2050 for (i = 0; i < size; i++) {
2051 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002052 self, PyTuple_GET_ITEM(args, i), Py_None
2053 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002054 if (!item) {
2055 Py_DECREF(result);
2056 return NULL;
2057 }
2058 PyTuple_SET_ITEM(result, i, item);
2059 }
2060 break;
2061 }
2062 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002063}
2064
Eric V. Smith605bdae2016-09-11 08:55:43 -04002065static PyObject*
2066match_getitem(MatchObject* self, PyObject* name)
2067{
2068 return match_getslice(self, name, Py_None);
2069}
2070
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002071/*[clinic input]
2072_sre.SRE_Match.groups
2073
2074 default: object = None
2075 Is used for groups that did not participate in the match.
2076
2077Return a tuple containing all the subgroups of the match, from 1.
2078[clinic start generated code]*/
2079
2080static PyObject *
2081_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2082/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002083{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002084 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002085 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002086
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002087 result = PyTuple_New(self->groups-1);
2088 if (!result)
2089 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002090
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002091 for (index = 1; index < self->groups; index++) {
2092 PyObject* item;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002093 item = match_getslice_by_index(self, index, default_value);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002094 if (!item) {
2095 Py_DECREF(result);
2096 return NULL;
2097 }
2098 PyTuple_SET_ITEM(result, index-1, item);
2099 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002100
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002101 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002102}
2103
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002104/*[clinic input]
2105_sre.SRE_Match.groupdict
2106
2107 default: object = None
2108 Is used for groups that did not participate in the match.
2109
2110Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2111[clinic start generated code]*/
2112
2113static PyObject *
2114_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2115/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002116{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002117 PyObject* result;
2118 PyObject* keys;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002119 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002120
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002121 result = PyDict_New();
2122 if (!result || !self->pattern->groupindex)
2123 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002124
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002125 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002126 if (!keys)
2127 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002128
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002129 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002130 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002131 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002132 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002133 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002134 if (!key)
2135 goto failed;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002136 value = match_getslice(self, key, default_value);
Benjamin Peterson3a27b082016-08-15 22:01:41 -07002137 if (!value)
Fredrik Lundh770617b2001-01-14 15:06:11 +00002138 goto failed;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002139 status = PyDict_SetItem(result, key, value);
2140 Py_DECREF(value);
2141 if (status < 0)
2142 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002143 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002144
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002145 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002146
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002147 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002148
2149failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00002150 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002151 Py_DECREF(result);
2152 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002153}
2154
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002155/*[clinic input]
2156_sre.SRE_Match.start -> Py_ssize_t
2157
2158 group: object(c_default="NULL") = 0
2159 /
2160
2161Return index of the start of the substring matched by group.
2162[clinic start generated code]*/
2163
2164static Py_ssize_t
2165_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2166/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002167{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002168 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002169
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002170 if (index < 0 || index >= self->groups) {
2171 PyErr_SetString(
2172 PyExc_IndexError,
2173 "no such group"
2174 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002175 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002176 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002177
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002178 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002179 return self->mark[index*2];
Guido van Rossumb700df92000-03-31 14:59:30 +00002180}
2181
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002182/*[clinic input]
2183_sre.SRE_Match.end -> Py_ssize_t
2184
2185 group: object(c_default="NULL") = 0
2186 /
2187
2188Return index of the end of the substring matched by group.
2189[clinic start generated code]*/
2190
2191static Py_ssize_t
2192_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2193/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002194{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002195 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002196
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002197 if (index < 0 || index >= self->groups) {
2198 PyErr_SetString(
2199 PyExc_IndexError,
2200 "no such group"
2201 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002202 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002203 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002204
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002205 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002206 return self->mark[index*2+1];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002207}
2208
2209LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002210_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002211{
2212 PyObject* pair;
2213 PyObject* item;
2214
2215 pair = PyTuple_New(2);
2216 if (!pair)
2217 return NULL;
2218
Christian Heimes217cfd12007-12-02 14:31:20 +00002219 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002220 if (!item)
2221 goto error;
2222 PyTuple_SET_ITEM(pair, 0, item);
2223
Christian Heimes217cfd12007-12-02 14:31:20 +00002224 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002225 if (!item)
2226 goto error;
2227 PyTuple_SET_ITEM(pair, 1, item);
2228
2229 return pair;
2230
2231 error:
2232 Py_DECREF(pair);
2233 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002234}
2235
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002236/*[clinic input]
2237_sre.SRE_Match.span
2238
2239 group: object(c_default="NULL") = 0
2240 /
2241
2242For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).
2243[clinic start generated code]*/
2244
2245static PyObject *
2246_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2247/*[clinic end generated code: output=f02ae40594d14fe6 input=49092b6008d176d3]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002248{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002249 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002250
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002251 if (index < 0 || index >= self->groups) {
2252 PyErr_SetString(
2253 PyExc_IndexError,
2254 "no such group"
2255 );
2256 return NULL;
2257 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002258
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002259 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002260 return _pair(self->mark[index*2], self->mark[index*2+1]);
2261}
2262
2263static PyObject*
2264match_regs(MatchObject* self)
2265{
2266 PyObject* regs;
2267 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002268 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002269
2270 regs = PyTuple_New(self->groups);
2271 if (!regs)
2272 return NULL;
2273
2274 for (index = 0; index < self->groups; index++) {
2275 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2276 if (!item) {
2277 Py_DECREF(regs);
2278 return NULL;
2279 }
2280 PyTuple_SET_ITEM(regs, index, item);
2281 }
2282
2283 Py_INCREF(regs);
2284 self->regs = regs;
2285
2286 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002287}
2288
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002289/*[clinic input]
2290_sre.SRE_Match.__copy__
2291
2292[clinic start generated code]*/
2293
2294static PyObject *
2295_sre_SRE_Match___copy___impl(MatchObject *self)
2296/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002297{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002298#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002299 MatchObject* copy;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002300 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00002301
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002302 slots = 2 * (self->pattern->groups+1);
2303
2304 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2305 if (!copy)
2306 return NULL;
2307
2308 /* this value a constant, but any compiler should be able to
2309 figure that out all by itself */
2310 offset = offsetof(MatchObject, string);
2311
2312 Py_XINCREF(self->pattern);
2313 Py_XINCREF(self->string);
2314 Py_XINCREF(self->regs);
2315
2316 memcpy((char*) copy + offset, (char*) self + offset,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002317 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002318
2319 return (PyObject*) copy;
2320#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002321 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002322 return NULL;
2323#endif
2324}
2325
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002326/*[clinic input]
2327_sre.SRE_Match.__deepcopy__
2328
2329 memo: object
2330
2331[clinic start generated code]*/
2332
2333static PyObject *
2334_sre_SRE_Match___deepcopy___impl(MatchObject *self, PyObject *memo)
2335/*[clinic end generated code: output=2b657578eb03f4a3 input=b65b72489eac64cc]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002336{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002337#ifdef USE_BUILTIN_COPY
2338 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002339
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002340 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002341 if (!copy)
2342 return NULL;
2343
2344 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2345 !deepcopy(&copy->string, memo) ||
2346 !deepcopy(&copy->regs, memo)) {
2347 Py_DECREF(copy);
2348 return NULL;
2349 }
2350
2351#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002352 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2353 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002354#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002355}
2356
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002357PyDoc_STRVAR(match_doc,
2358"The result of re.match() and re.search().\n\
2359Match objects always have a boolean value of True.");
2360
2361PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002362"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002363 Return subgroup(s) of the match by indices or names.\n\
2364 For 0 returns the entire match.");
2365
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002366static PyObject *
2367match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002368{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002369 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002370 return PyLong_FromSsize_t(self->lastindex);
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002371 Py_INCREF(Py_None);
2372 return Py_None;
Guido van Rossumb700df92000-03-31 14:59:30 +00002373}
2374
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002375static PyObject *
2376match_lastgroup_get(MatchObject *self)
2377{
2378 if (self->pattern->indexgroup && self->lastindex >= 0) {
2379 PyObject* result = PySequence_GetItem(
2380 self->pattern->indexgroup, self->lastindex
2381 );
2382 if (result)
2383 return result;
2384 PyErr_Clear();
2385 }
2386 Py_INCREF(Py_None);
2387 return Py_None;
2388}
2389
2390static PyObject *
2391match_regs_get(MatchObject *self)
2392{
2393 if (self->regs) {
2394 Py_INCREF(self->regs);
2395 return self->regs;
2396 } else
2397 return match_regs(self);
2398}
2399
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002400static PyObject *
2401match_repr(MatchObject *self)
2402{
2403 PyObject *result;
2404 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2405 if (group0 == NULL)
2406 return NULL;
2407 result = PyUnicode_FromFormat(
2408 "<%s object; span=(%d, %d), match=%.50R>",
2409 Py_TYPE(self)->tp_name,
2410 self->mark[0], self->mark[1], group0);
2411 Py_DECREF(group0);
2412 return result;
2413}
2414
2415
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002416static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002417pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002418{
2419 /* create match object (from state object) */
2420
2421 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002422 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002423 char* base;
2424 int n;
2425
2426 if (status > 0) {
2427
2428 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002429 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002430 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2431 2*(pattern->groups+1));
2432 if (!match)
2433 return NULL;
2434
2435 Py_INCREF(pattern);
2436 match->pattern = pattern;
2437
2438 Py_INCREF(state->string);
2439 match->string = state->string;
2440
2441 match->regs = NULL;
2442 match->groups = pattern->groups+1;
2443
2444 /* fill in group slices */
2445
2446 base = (char*) state->beginning;
2447 n = state->charsize;
2448
2449 match->mark[0] = ((char*) state->start - base) / n;
2450 match->mark[1] = ((char*) state->ptr - base) / n;
2451
2452 for (i = j = 0; i < pattern->groups; i++, j+=2)
2453 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2454 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2455 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2456 } else
2457 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2458
2459 match->pos = state->pos;
2460 match->endpos = state->endpos;
2461
2462 match->lastindex = state->lastindex;
2463
2464 return (PyObject*) match;
2465
2466 } else if (status == 0) {
2467
2468 /* no match */
2469 Py_INCREF(Py_None);
2470 return Py_None;
2471
2472 }
2473
2474 /* internal error */
2475 pattern_error(status);
2476 return NULL;
2477}
2478
2479
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002480/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002481/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002482
2483static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002484scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002485{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002486 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002487 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002488 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002489}
2490
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002491/*[clinic input]
2492_sre.SRE_Scanner.match
2493
2494[clinic start generated code]*/
2495
2496static PyObject *
2497_sre_SRE_Scanner_match_impl(ScannerObject *self)
2498/*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002499{
2500 SRE_STATE* state = &self->state;
2501 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002502 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002503
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002504 if (state->start == NULL)
2505 Py_RETURN_NONE;
2506
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002507 state_reset(state);
2508
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002509 state->ptr = state->start;
2510
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03002511 status = sre_match(state, PatternObject_GetCode(self->pattern), 0);
Thomas Wouters89f507f2006-12-13 04:49:30 +00002512 if (PyErr_Occurred())
2513 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002514
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002515 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002516 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002517
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002518 if (status == 0)
2519 state->start = NULL;
2520 else if (state->ptr != state->start)
2521 state->start = state->ptr;
2522 else if (state->ptr != state->end)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002523 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002524 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002525 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002526
2527 return match;
2528}
2529
2530
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002531/*[clinic input]
2532_sre.SRE_Scanner.search
2533
2534[clinic start generated code]*/
2535
2536static PyObject *
2537_sre_SRE_Scanner_search_impl(ScannerObject *self)
2538/*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002539{
2540 SRE_STATE* state = &self->state;
2541 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002542 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002543
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002544 if (state->start == NULL)
2545 Py_RETURN_NONE;
2546
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002547 state_reset(state);
2548
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002549 state->ptr = state->start;
2550
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002551 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002552 if (PyErr_Occurred())
2553 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002554
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002555 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002556 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002557
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002558 if (status == 0)
2559 state->start = NULL;
2560 else if (state->ptr != state->start)
2561 state->start = state->ptr;
2562 else if (state->ptr != state->end)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002563 state->start = (void*) ((char*) state->ptr + state->charsize);
2564 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002565 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002566
2567 return match;
2568}
2569
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002570static PyObject *
2571pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002572{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002573 ScannerObject* scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002574
2575 /* create scanner object */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002576 scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2577 if (!scanner)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002578 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002579 scanner->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002580
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002581 /* create search state object */
2582 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2583 Py_DECREF(scanner);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002584 return NULL;
2585 }
2586
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002587 Py_INCREF(self);
2588 scanner->pattern = (PyObject*) self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002589
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002590 return (PyObject*) scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002591}
2592
Victor Stinnerb44fb122016-11-21 16:35:08 +01002593static Py_hash_t
2594pattern_hash(PatternObject *self)
2595{
2596 Py_hash_t hash, hash2;
2597
2598 hash = PyObject_Hash(self->pattern);
2599 if (hash == -1) {
2600 return -1;
2601 }
2602
2603 hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2604 hash ^= hash2;
2605
2606 hash ^= self->flags;
2607 hash ^= self->isbytes;
2608 hash ^= self->codesize;
2609
2610 if (hash == -1) {
2611 hash = -2;
2612 }
2613 return hash;
2614}
2615
2616static PyObject*
2617pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2618{
2619 PatternObject *left, *right;
2620 int cmp;
2621
2622 if (op != Py_EQ && op != Py_NE) {
2623 Py_RETURN_NOTIMPLEMENTED;
2624 }
2625
2626 if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
2627 Py_RETURN_NOTIMPLEMENTED;
2628 }
Victor Stinnerbcf4dcc2016-11-22 15:30:38 +01002629
2630 if (lefto == righto) {
2631 /* a pattern is equal to itself */
2632 return PyBool_FromLong(op == Py_EQ);
2633 }
2634
Victor Stinnerb44fb122016-11-21 16:35:08 +01002635 left = (PatternObject *)lefto;
2636 right = (PatternObject *)righto;
2637
2638 cmp = (left->flags == right->flags
2639 && left->isbytes == right->isbytes
Victor Stinnere670b2d2016-11-22 15:23:00 +01002640 && left->codesize == right->codesize);
Victor Stinnerb44fb122016-11-21 16:35:08 +01002641 if (cmp) {
2642 /* Compare the code and the pattern because the same pattern can
2643 produce different codes depending on the locale used to compile the
2644 pattern when the re.LOCALE flag is used. Don't compare groups,
2645 indexgroup nor groupindex: they are derivated from the pattern. */
2646 cmp = (memcmp(left->code, right->code,
2647 sizeof(left->code[0]) * left->codesize) == 0);
2648 }
2649 if (cmp) {
2650 cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2651 Py_EQ);
2652 if (cmp < 0) {
2653 return NULL;
2654 }
2655 }
2656 if (op == Py_NE) {
2657 cmp = !cmp;
2658 }
2659 return PyBool_FromLong(cmp);
2660}
2661
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002662#include "clinic/_sre.c.h"
2663
2664static PyMethodDef pattern_methods[] = {
2665 _SRE_SRE_PATTERN_MATCH_METHODDEF
2666 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2667 _SRE_SRE_PATTERN_SEARCH_METHODDEF
2668 _SRE_SRE_PATTERN_SUB_METHODDEF
2669 _SRE_SRE_PATTERN_SUBN_METHODDEF
2670 _SRE_SRE_PATTERN_FINDALL_METHODDEF
2671 _SRE_SRE_PATTERN_SPLIT_METHODDEF
2672 _SRE_SRE_PATTERN_FINDITER_METHODDEF
2673 _SRE_SRE_PATTERN_SCANNER_METHODDEF
2674 _SRE_SRE_PATTERN___COPY___METHODDEF
2675 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2676 {NULL, NULL}
2677};
2678
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002679static PyGetSetDef pattern_getset[] = {
2680 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2681 "A dictionary mapping group names to group numbers."},
2682 {NULL} /* Sentinel */
2683};
2684
2685#define PAT_OFF(x) offsetof(PatternObject, x)
2686static PyMemberDef pattern_members[] = {
2687 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
2688 {"flags", T_INT, PAT_OFF(flags), READONLY},
2689 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
2690 {NULL} /* Sentinel */
2691};
2692
2693static PyTypeObject Pattern_Type = {
2694 PyVarObject_HEAD_INIT(NULL, 0)
2695 "_" SRE_MODULE ".SRE_Pattern",
2696 sizeof(PatternObject), sizeof(SRE_CODE),
2697 (destructor)pattern_dealloc, /* tp_dealloc */
2698 0, /* tp_print */
2699 0, /* tp_getattr */
2700 0, /* tp_setattr */
2701 0, /* tp_reserved */
2702 (reprfunc)pattern_repr, /* tp_repr */
2703 0, /* tp_as_number */
2704 0, /* tp_as_sequence */
2705 0, /* tp_as_mapping */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002706 (hashfunc)pattern_hash, /* tp_hash */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002707 0, /* tp_call */
2708 0, /* tp_str */
2709 0, /* tp_getattro */
2710 0, /* tp_setattro */
2711 0, /* tp_as_buffer */
2712 Py_TPFLAGS_DEFAULT, /* tp_flags */
2713 pattern_doc, /* tp_doc */
2714 0, /* tp_traverse */
2715 0, /* tp_clear */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002716 pattern_richcompare, /* tp_richcompare */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002717 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2718 0, /* tp_iter */
2719 0, /* tp_iternext */
2720 pattern_methods, /* tp_methods */
2721 pattern_members, /* tp_members */
2722 pattern_getset, /* tp_getset */
2723};
2724
Eric V. Smith605bdae2016-09-11 08:55:43 -04002725/* Match objects do not support length or assignment, but do support
2726 __getitem__. */
2727static PyMappingMethods match_as_mapping = {
2728 NULL,
2729 (binaryfunc)match_getitem,
2730 NULL
2731};
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002732
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002733static PyMethodDef match_methods[] = {
2734 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2735 _SRE_SRE_MATCH_START_METHODDEF
2736 _SRE_SRE_MATCH_END_METHODDEF
2737 _SRE_SRE_MATCH_SPAN_METHODDEF
2738 _SRE_SRE_MATCH_GROUPS_METHODDEF
2739 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2740 _SRE_SRE_MATCH_EXPAND_METHODDEF
2741 _SRE_SRE_MATCH___COPY___METHODDEF
2742 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2743 {NULL, NULL}
2744};
2745
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002746static PyGetSetDef match_getset[] = {
2747 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
2748 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
2749 {"regs", (getter)match_regs_get, (setter)NULL},
2750 {NULL}
2751};
2752
2753#define MATCH_OFF(x) offsetof(MatchObject, x)
2754static PyMemberDef match_members[] = {
2755 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
2756 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
2757 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
2758 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
2759 {NULL}
2760};
2761
2762/* FIXME: implement setattr("string", None) as a special case (to
2763 detach the associated string, if any */
2764
2765static PyTypeObject Match_Type = {
2766 PyVarObject_HEAD_INIT(NULL,0)
2767 "_" SRE_MODULE ".SRE_Match",
2768 sizeof(MatchObject), sizeof(Py_ssize_t),
2769 (destructor)match_dealloc, /* tp_dealloc */
2770 0, /* tp_print */
2771 0, /* tp_getattr */
2772 0, /* tp_setattr */
2773 0, /* tp_reserved */
2774 (reprfunc)match_repr, /* tp_repr */
2775 0, /* tp_as_number */
2776 0, /* tp_as_sequence */
Eric V. Smith605bdae2016-09-11 08:55:43 -04002777 &match_as_mapping, /* tp_as_mapping */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002778 0, /* tp_hash */
2779 0, /* tp_call */
2780 0, /* tp_str */
2781 0, /* tp_getattro */
2782 0, /* tp_setattro */
2783 0, /* tp_as_buffer */
2784 Py_TPFLAGS_DEFAULT, /* tp_flags */
2785 match_doc, /* tp_doc */
2786 0, /* tp_traverse */
2787 0, /* tp_clear */
2788 0, /* tp_richcompare */
2789 0, /* tp_weaklistoffset */
2790 0, /* tp_iter */
2791 0, /* tp_iternext */
2792 match_methods, /* tp_methods */
2793 match_members, /* tp_members */
2794 match_getset, /* tp_getset */
2795};
2796
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002797static PyMethodDef scanner_methods[] = {
2798 _SRE_SRE_SCANNER_MATCH_METHODDEF
2799 _SRE_SRE_SCANNER_SEARCH_METHODDEF
2800 {NULL, NULL}
2801};
2802
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002803#define SCAN_OFF(x) offsetof(ScannerObject, x)
2804static PyMemberDef scanner_members[] = {
2805 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2806 {NULL} /* Sentinel */
2807};
2808
2809static PyTypeObject Scanner_Type = {
2810 PyVarObject_HEAD_INIT(NULL, 0)
2811 "_" SRE_MODULE ".SRE_Scanner",
2812 sizeof(ScannerObject), 0,
2813 (destructor)scanner_dealloc,/* tp_dealloc */
2814 0, /* tp_print */
2815 0, /* tp_getattr */
2816 0, /* tp_setattr */
2817 0, /* tp_reserved */
2818 0, /* tp_repr */
2819 0, /* tp_as_number */
2820 0, /* tp_as_sequence */
2821 0, /* tp_as_mapping */
2822 0, /* tp_hash */
2823 0, /* tp_call */
2824 0, /* tp_str */
2825 0, /* tp_getattro */
2826 0, /* tp_setattro */
2827 0, /* tp_as_buffer */
2828 Py_TPFLAGS_DEFAULT, /* tp_flags */
2829 0, /* tp_doc */
2830 0, /* tp_traverse */
2831 0, /* tp_clear */
2832 0, /* tp_richcompare */
2833 0, /* tp_weaklistoffset */
2834 0, /* tp_iter */
2835 0, /* tp_iternext */
2836 scanner_methods, /* tp_methods */
2837 scanner_members, /* tp_members */
2838 0, /* tp_getset */
2839};
2840
Guido van Rossumb700df92000-03-31 14:59:30 +00002841static PyMethodDef _functions[] = {
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002842 _SRE_COMPILE_METHODDEF
2843 _SRE_GETCODESIZE_METHODDEF
2844 _SRE_GETLOWER_METHODDEF
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002845 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002846};
2847
Martin v. Löwis1a214512008-06-11 05:26:20 +00002848static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002849 PyModuleDef_HEAD_INIT,
2850 "_" SRE_MODULE,
2851 NULL,
2852 -1,
2853 _functions,
2854 NULL,
2855 NULL,
2856 NULL,
2857 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002858};
2859
2860PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002861{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002862 PyObject* m;
2863 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002864 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002865
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002866 /* Patch object types */
2867 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2868 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002869 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002870
Martin v. Löwis1a214512008-06-11 05:26:20 +00002871 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002872 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002873 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002874 d = PyModule_GetDict(m);
2875
Christian Heimes217cfd12007-12-02 14:31:20 +00002876 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002877 if (x) {
2878 PyDict_SetItemString(d, "MAGIC", x);
2879 Py_DECREF(x);
2880 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002881
Christian Heimes217cfd12007-12-02 14:31:20 +00002882 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002883 if (x) {
2884 PyDict_SetItemString(d, "CODESIZE", x);
2885 Py_DECREF(x);
2886 }
2887
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002888 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2889 if (x) {
2890 PyDict_SetItemString(d, "MAXREPEAT", x);
2891 Py_DECREF(x);
2892 }
2893
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03002894 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2895 if (x) {
2896 PyDict_SetItemString(d, "MAXGROUPS", x);
2897 Py_DECREF(x);
2898 }
2899
Neal Norwitzfe537132007-08-26 03:55:15 +00002900 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002901 if (x) {
2902 PyDict_SetItemString(d, "copyright", x);
2903 Py_DECREF(x);
2904 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002905 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002906}
2907
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002908/* vim:ts=4:sw=4:et
2909*/