blob: fb0ab033c502dfab287c29c6d2ccbd133128016b [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020038static const char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000063/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000065/* enables copy/deepcopy handling (work in progress) */
66#undef USE_BUILTIN_COPY
67
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000068/* -------------------------------------------------------------------- */
69
Fredrik Lundh80946112000-06-29 18:03:25 +000070#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000071#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000072#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000073/* fastest possible local call under MSVC */
74#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000075#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000076#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000077#else
78#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000079#endif
80
81/* error codes */
82#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000083#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000084#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000085#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000086#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000087
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000088#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000089#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000090#else
91#define TRACE(v)
92#endif
93
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000094/* -------------------------------------------------------------------- */
95/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000096
Fredrik Lundh436c3d582000-06-29 08:58:44 +000097#define SRE_IS_DIGIT(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030098 ((ch) < 128 && Py_ISDIGIT(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000099#define SRE_IS_SPACE(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300100 ((ch) < 128 && Py_ISSPACE(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000101#define SRE_IS_LINEBREAK(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300102 ((ch) == '\n')
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000103#define SRE_IS_ALNUM(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300104 ((ch) < 128 && Py_ISALNUM(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000105#define SRE_IS_WORD(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300106 ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
Guido van Rossumb700df92000-03-31 14:59:30 +0000107
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000108static unsigned int sre_lower(unsigned int ch)
109{
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300110 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000111}
112
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200113static unsigned int sre_upper(unsigned int ch)
114{
115 return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
116}
117
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000118/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000119/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
120 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000121#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000122#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
123
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000124static unsigned int sre_lower_locale(unsigned int ch)
125{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000126 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000127}
128
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200129static unsigned int sre_upper_locale(unsigned int ch)
130{
131 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
132}
133
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000134/* unicode-specific character predicates */
135
Victor Stinner0058b862011-09-29 03:27:47 +0200136#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
137#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
138#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
139#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
140#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000141
142static unsigned int sre_lower_unicode(unsigned int ch)
143{
Victor Stinner0058b862011-09-29 03:27:47 +0200144 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000145}
146
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200147static unsigned int sre_upper_unicode(unsigned int ch)
148{
149 return (unsigned int) Py_UNICODE_TOUPPER(ch);
150}
151
Guido van Rossumb700df92000-03-31 14:59:30 +0000152LOCAL(int)
153sre_category(SRE_CODE category, unsigned int ch)
154{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000155 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000156
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000157 case SRE_CATEGORY_DIGIT:
158 return SRE_IS_DIGIT(ch);
159 case SRE_CATEGORY_NOT_DIGIT:
160 return !SRE_IS_DIGIT(ch);
161 case SRE_CATEGORY_SPACE:
162 return SRE_IS_SPACE(ch);
163 case SRE_CATEGORY_NOT_SPACE:
164 return !SRE_IS_SPACE(ch);
165 case SRE_CATEGORY_WORD:
166 return SRE_IS_WORD(ch);
167 case SRE_CATEGORY_NOT_WORD:
168 return !SRE_IS_WORD(ch);
169 case SRE_CATEGORY_LINEBREAK:
170 return SRE_IS_LINEBREAK(ch);
171 case SRE_CATEGORY_NOT_LINEBREAK:
172 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000173
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000174 case SRE_CATEGORY_LOC_WORD:
175 return SRE_LOC_IS_WORD(ch);
176 case SRE_CATEGORY_LOC_NOT_WORD:
177 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000178
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000179 case SRE_CATEGORY_UNI_DIGIT:
180 return SRE_UNI_IS_DIGIT(ch);
181 case SRE_CATEGORY_UNI_NOT_DIGIT:
182 return !SRE_UNI_IS_DIGIT(ch);
183 case SRE_CATEGORY_UNI_SPACE:
184 return SRE_UNI_IS_SPACE(ch);
185 case SRE_CATEGORY_UNI_NOT_SPACE:
186 return !SRE_UNI_IS_SPACE(ch);
187 case SRE_CATEGORY_UNI_WORD:
188 return SRE_UNI_IS_WORD(ch);
189 case SRE_CATEGORY_UNI_NOT_WORD:
190 return !SRE_UNI_IS_WORD(ch);
191 case SRE_CATEGORY_UNI_LINEBREAK:
192 return SRE_UNI_IS_LINEBREAK(ch);
193 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
194 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000195 }
196 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000197}
198
199/* helpers */
200
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000201static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000202data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000203{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000204 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000205 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000206 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000207 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000208 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000209}
210
211static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000212data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000213{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000214 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000215 minsize = state->data_stack_base+size;
216 cursize = state->data_stack_size;
217 if (cursize < minsize) {
218 void* stack;
219 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300220 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000221 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000222 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000223 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000224 return SRE_ERROR_MEMORY;
225 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000226 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000227 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000228 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000229 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000230}
231
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000232/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000233
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300234#define SRE_CHAR Py_UCS1
235#define SIZEOF_SRE_CHAR 1
236#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300237#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000238
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300239/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000240
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300241#define SRE_CHAR Py_UCS2
242#define SIZEOF_SRE_CHAR 2
243#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300244#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000245
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300246/* generate 32-bit unicode version */
247
248#define SRE_CHAR Py_UCS4
249#define SIZEOF_SRE_CHAR 4
250#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300251#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000252
253/* -------------------------------------------------------------------- */
254/* factories and destructors */
255
256/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100257static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300258static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
Guido van Rossumb700df92000-03-31 14:59:30 +0000259
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300260
261/*[clinic input]
262module _sre
263class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
264class _sre.SRE_Match "MatchObject *" "&Match_Type"
265class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
266[clinic start generated code]*/
267/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
268
Larry Hastings2d0a69a2015-05-03 14:49:19 -0700269static PyTypeObject Pattern_Type;
270static PyTypeObject Match_Type;
271static PyTypeObject Scanner_Type;
272
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300273/*[clinic input]
274_sre.getcodesize -> int
275[clinic start generated code]*/
276
277static int
278_sre_getcodesize_impl(PyModuleDef *module)
279/*[clinic end generated code: output=794f1f98ef4883e5 input=bd6f6ecf4916bb2b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000280{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300281 return sizeof(SRE_CODE);
Guido van Rossumb700df92000-03-31 14:59:30 +0000282}
283
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300284/*[clinic input]
285_sre.getlower -> int
286
287 character: int
288 flags: int
289 /
290
291[clinic start generated code]*/
292
293static int
294_sre_getlower_impl(PyModuleDef *module, int character, int flags)
295/*[clinic end generated code: output=5fc3616ae2a4c306 input=087d2f1c44bbca6f]*/
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000296{
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000297 if (flags & SRE_FLAG_LOCALE)
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300298 return sre_lower_locale(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000299 if (flags & SRE_FLAG_UNICODE)
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300300 return sre_lower_unicode(character);
301 return sre_lower(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000302}
303
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000304LOCAL(void)
305state_reset(SRE_STATE* state)
306{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000307 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000308 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000309
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000310 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000311 state->lastindex = -1;
312
313 state->repeat = NULL;
314
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000315 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000316}
317
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000318static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200319getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300320 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600321 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000322{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000323 /* given a python object, return a data pointer, a length (in
324 characters), and a character size. return NULL if the object
325 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000326
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000327 /* Unicode objects do not support the buffer API. So, get the data
328 directly instead. */
329 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200330 if (PyUnicode_READY(string) == -1)
331 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200332 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200333 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300334 *p_isbytes = 0;
335 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000336 }
337
Victor Stinner0058b862011-09-29 03:27:47 +0200338 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300339 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200340 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300341 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000342 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000343
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300344 *p_length = view->len;
345 *p_charsize = 1;
346 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000347
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300348 if (view->buf == NULL) {
349 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
350 PyBuffer_Release(view);
351 view->buf = NULL;
352 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000353 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300354 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000355}
356
357LOCAL(PyObject*)
358state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000359 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000360{
361 /* prepare state object */
362
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000363 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300364 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000365 void* ptr;
366
367 memset(state, 0, sizeof(SRE_STATE));
368
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300369 state->mark = PyMem_New(void *, pattern->groups * 2);
370 if (!state->mark) {
371 PyErr_NoMemory();
372 goto err;
373 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000374 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000375 state->lastindex = -1;
376
Benjamin Petersone48944b2012-03-07 14:50:25 -0600377 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300378 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000379 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600380 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000381
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300382 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600383 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200384 "cannot use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600385 goto err;
386 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300387 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600388 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200389 "cannot use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600390 goto err;
391 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000392
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000393 /* adjust boundaries */
394 if (start < 0)
395 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000396 else if (start > length)
397 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000398
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000399 if (end < 0)
400 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000401 else if (end > length)
402 end = length;
403
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300404 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000405 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000406
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000407 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000408
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000409 state->start = (void*) ((char*) ptr + start * state->charsize);
410 state->end = (void*) ((char*) ptr + end * state->charsize);
411
412 Py_INCREF(string);
413 state->string = string;
414 state->pos = start;
415 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000416
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200417 if (pattern->flags & SRE_FLAG_LOCALE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000418 state->lower = sre_lower_locale;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200419 state->upper = sre_upper_locale;
420 }
421 else if (pattern->flags & SRE_FLAG_UNICODE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000422 state->lower = sre_lower_unicode;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200423 state->upper = sre_upper_unicode;
424 }
425 else {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000426 state->lower = sre_lower;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200427 state->upper = sre_upper;
428 }
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000429
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000430 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600431 err:
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300432 PyMem_Del(state->mark);
433 state->mark = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600434 if (state->buffer.buf)
435 PyBuffer_Release(&state->buffer);
436 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000437}
438
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000439LOCAL(void)
440state_fini(SRE_STATE* state)
441{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600442 if (state->buffer.buf)
443 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000444 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000445 data_stack_dealloc(state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300446 PyMem_Del(state->mark);
447 state->mark = NULL;
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000448}
449
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000450/* calculate offset from start of string */
451#define STATE_OFFSET(state, member)\
452 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
453
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000454LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300455getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300456 PyObject* string, Py_ssize_t start, Py_ssize_t end)
457{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300458 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300459 if (PyBytes_CheckExact(string) &&
460 start == 0 && end == PyBytes_GET_SIZE(string)) {
461 Py_INCREF(string);
462 return string;
463 }
464 return PyBytes_FromStringAndSize(
465 (const char *)ptr + start, end - start);
466 }
467 else {
468 return PyUnicode_Substring(string, start, end);
469 }
470}
471
472LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000473state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000474{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000475 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000476
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000477 index = (index - 1) * 2;
478
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000479 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000480 if (empty)
481 /* want empty string */
482 i = j = 0;
483 else {
484 Py_INCREF(Py_None);
485 return Py_None;
486 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000487 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000488 i = STATE_OFFSET(state, state->mark[index]);
489 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000490 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000491
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300492 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000493}
494
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000495static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100496pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000497{
498 switch (status) {
499 case SRE_ERROR_RECURSION_LIMIT:
Yury Selivanovf488fb42015-07-03 01:04:23 -0400500 /* This error code seems to be unused. */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000501 PyErr_SetString(
Yury Selivanovf488fb42015-07-03 01:04:23 -0400502 PyExc_RecursionError,
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000503 "maximum recursion limit exceeded"
504 );
505 break;
506 case SRE_ERROR_MEMORY:
507 PyErr_NoMemory();
508 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000509 case SRE_ERROR_INTERRUPTED:
510 /* An exception has already been raised, so let it fly */
511 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000512 default:
513 /* other error codes indicate compiler/engine bugs */
514 PyErr_SetString(
515 PyExc_RuntimeError,
516 "internal error in regular expression engine"
517 );
518 }
519}
520
Guido van Rossumb700df92000-03-31 14:59:30 +0000521static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000522pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000523{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000524 if (self->weakreflist != NULL)
525 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000526 Py_XDECREF(self->pattern);
527 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000528 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000529 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000530}
531
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300532LOCAL(Py_ssize_t)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300533sre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300534{
535 if (state->charsize == 1)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300536 return sre_ucs1_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300537 if (state->charsize == 2)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300538 return sre_ucs2_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300539 assert(state->charsize == 4);
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300540 return sre_ucs4_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300541}
542
543LOCAL(Py_ssize_t)
544sre_search(SRE_STATE* state, SRE_CODE* pattern)
545{
546 if (state->charsize == 1)
547 return sre_ucs1_search(state, pattern);
548 if (state->charsize == 2)
549 return sre_ucs2_search(state, pattern);
550 assert(state->charsize == 4);
551 return sre_ucs4_search(state, pattern);
552}
553
Larry Hastings16c51912014-01-07 11:53:01 -0800554static PyObject *
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200555fix_string_param(PyObject *string, PyObject *string2, const char *oldname)
556{
557 if (string2 != NULL) {
558 if (string != NULL) {
559 PyErr_Format(PyExc_TypeError,
560 "Argument given by name ('%s') and position (1)",
561 oldname);
562 return NULL;
563 }
564 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
565 "The '%s' keyword parameter name is deprecated. "
566 "Use 'string' instead.", oldname) < 0)
567 return NULL;
568 return string2;
569 }
570 if (string == NULL) {
571 PyErr_SetString(PyExc_TypeError,
572 "Required argument 'string' (pos 1) not found");
573 return NULL;
574 }
575 return string;
576}
Larry Hastings16c51912014-01-07 11:53:01 -0800577
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300578/*[clinic input]
579_sre.SRE_Pattern.match
580
581 string: object = NULL
582 pos: Py_ssize_t = 0
583 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
584 *
585 pattern: object = NULL
586
587Matches zero or more characters at the beginning of the string.
588[clinic start generated code]*/
589
Larry Hastings16c51912014-01-07 11:53:01 -0800590static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300591_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
592 Py_ssize_t pos, Py_ssize_t endpos,
593 PyObject *pattern)
594/*[clinic end generated code: output=74b4b1da3bb2d84e input=3d079aa99979b81d]*/
Larry Hastings16c51912014-01-07 11:53:01 -0800595{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000596 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100597 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300598 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000599
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200600 string = fix_string_param(string, pattern, "pattern");
601 if (!string)
602 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300603 if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000604 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000605
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000606 state.ptr = state.start;
607
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000608 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
609
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300610 status = sre_match(&state, PatternObject_GetCode(self), 0);
Guido van Rossumb700df92000-03-31 14:59:30 +0000611
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000612 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300613 if (PyErr_Occurred()) {
614 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000615 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300616 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000617
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300618 match = pattern_new_match(self, &state, status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000619 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300620 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000621}
622
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300623/*[clinic input]
624_sre.SRE_Pattern.fullmatch
625
626 string: object = NULL
627 pos: Py_ssize_t = 0
628 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
629 *
630 pattern: object = NULL
631
632Matches against all of the string
633[clinic start generated code]*/
634
635static PyObject *
636_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
637 Py_ssize_t pos, Py_ssize_t endpos,
638 PyObject *pattern)
639/*[clinic end generated code: output=1c98bc5da744ea94 input=d4228606cc12580f]*/
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200640{
641 SRE_STATE state;
642 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300643 PyObject *match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200644
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300645 string = fix_string_param(string, pattern, "pattern");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200646 if (!string)
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200647 return NULL;
648
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300649 if (!state_init(&state, self, string, pos, endpos))
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200650 return NULL;
651
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200652 state.ptr = state.start;
653
654 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
655
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300656 status = sre_match(&state, PatternObject_GetCode(self), 1);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200657
658 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300659 if (PyErr_Occurred()) {
660 state_fini(&state);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200661 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300662 }
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200663
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300664 match = pattern_new_match(self, &state, status);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200665 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300666 return match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200667}
668
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300669/*[clinic input]
670_sre.SRE_Pattern.search
671
672 string: object = NULL
673 pos: Py_ssize_t = 0
674 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
675 *
676 pattern: object = NULL
677
678Scan through string looking for a match, and return a corresponding match object instance.
679
680Return None if no position in the string matches.
681[clinic start generated code]*/
682
683static PyObject *
684_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
685 Py_ssize_t pos, Py_ssize_t endpos,
686 PyObject *pattern)
687/*[clinic end generated code: output=3839394a18e5ea4f input=dab42720f4be3a4b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000688{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000689 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100690 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300691 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000692
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300693 string = fix_string_param(string, pattern, "pattern");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200694 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000695 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000696
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300697 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000698 return NULL;
699
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000700 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
701
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300702 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000703
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000704 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
705
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300706 if (PyErr_Occurred()) {
707 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000708 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300709 }
Thomas Wouters89f507f2006-12-13 04:49:30 +0000710
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300711 match = pattern_new_match(self, &state, status);
712 state_fini(&state);
713 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000714}
715
716static PyObject*
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200717call(const char* module, const char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000718{
719 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000720 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000721 PyObject* func;
722 PyObject* result;
723
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000724 if (!args)
725 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000726 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000727 if (!name)
728 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000729 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000730 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000731 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000732 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000733 func = PyObject_GetAttrString(mod, function);
734 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000735 if (!func)
736 return NULL;
737 result = PyObject_CallObject(func, args);
738 Py_DECREF(func);
739 Py_DECREF(args);
740 return result;
741}
742
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000743#ifdef USE_BUILTIN_COPY
744static int
745deepcopy(PyObject** object, PyObject* memo)
746{
747 PyObject* copy;
748
749 copy = call(
750 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000751 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000752 );
753 if (!copy)
754 return 0;
755
Serhiy Storchaka57a01d32016-04-10 18:05:40 +0300756 Py_SETREF(*object, copy);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000757
758 return 1; /* success */
759}
760#endif
761
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300762/*[clinic input]
763_sre.SRE_Pattern.findall
764
765 string: object = NULL
766 pos: Py_ssize_t = 0
767 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
768 *
769 source: object = NULL
770
771Return a list of all non-overlapping matches of pattern in string.
772[clinic start generated code]*/
773
774static PyObject *
775_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
776 Py_ssize_t pos, Py_ssize_t endpos,
777 PyObject *source)
778/*[clinic end generated code: output=51295498b300639d input=df688355c056b9de]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000779{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000780 SRE_STATE state;
781 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100782 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000783 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000784
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300785 string = fix_string_param(string, source, "source");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200786 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000787 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000788
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300789 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000790 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000791
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000792 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000793 if (!list) {
794 state_fini(&state);
795 return NULL;
796 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000797
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000798 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000799
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000800 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000801
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000802 state_reset(&state);
803
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000804 state.ptr = state.start;
805
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300806 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300807 if (PyErr_Occurred())
808 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000809
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000810 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000811 if (status == 0)
812 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000813 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000814 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000815 }
Tim Peters3d563502006-01-21 02:47:53 +0000816
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000817 /* don't bother to build a match object */
818 switch (self->groups) {
819 case 0:
820 b = STATE_OFFSET(&state, state.start);
821 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300822 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300823 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000824 if (!item)
825 goto error;
826 break;
827 case 1:
828 item = state_getslice(&state, 1, string, 1);
829 if (!item)
830 goto error;
831 break;
832 default:
833 item = PyTuple_New(self->groups);
834 if (!item)
835 goto error;
836 for (i = 0; i < self->groups; i++) {
837 PyObject* o = state_getslice(&state, i+1, string, 1);
838 if (!o) {
839 Py_DECREF(item);
840 goto error;
841 }
842 PyTuple_SET_ITEM(item, i, o);
843 }
844 break;
845 }
846
847 status = PyList_Append(list, item);
848 Py_DECREF(item);
849 if (status < 0)
850 goto error;
851
852 if (state.ptr == state.start)
853 state.start = (void*) ((char*) state.ptr + state.charsize);
854 else
855 state.start = state.ptr;
856
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000857 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000858
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000859 state_fini(&state);
860 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000861
862error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000863 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000864 state_fini(&state);
865 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000866
Guido van Rossumb700df92000-03-31 14:59:30 +0000867}
868
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300869/*[clinic input]
870_sre.SRE_Pattern.finditer
871
872 string: object
873 pos: Py_ssize_t = 0
874 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
875
876Return an iterator over all non-overlapping matches for the RE pattern in string.
877
878For each match, the iterator returns a match object.
879[clinic start generated code]*/
880
881static PyObject *
882_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
883 Py_ssize_t pos, Py_ssize_t endpos)
884/*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
Fredrik Lundh703ce812001-10-24 22:16:30 +0000885{
886 PyObject* scanner;
887 PyObject* search;
888 PyObject* iterator;
889
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300890 scanner = pattern_scanner(self, string, pos, endpos);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000891 if (!scanner)
892 return NULL;
893
894 search = PyObject_GetAttrString(scanner, "search");
895 Py_DECREF(scanner);
896 if (!search)
897 return NULL;
898
899 iterator = PyCallIter_New(search, Py_None);
900 Py_DECREF(search);
901
902 return iterator;
903}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000904
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300905/*[clinic input]
906_sre.SRE_Pattern.scanner
907
908 string: object
909 pos: Py_ssize_t = 0
910 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
911
912[clinic start generated code]*/
913
914static PyObject *
915_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
916 Py_ssize_t pos, Py_ssize_t endpos)
917/*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
918{
919 return pattern_scanner(self, string, pos, endpos);
920}
921
922/*[clinic input]
923_sre.SRE_Pattern.split
924
925 string: object = NULL
926 maxsplit: Py_ssize_t = 0
927 *
928 source: object = NULL
929
930Split string by the occurrences of pattern.
931[clinic start generated code]*/
932
933static PyObject *
934_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
935 Py_ssize_t maxsplit, PyObject *source)
936/*[clinic end generated code: output=20bac2ff55b9f84c input=41e0b2e35e599d7b]*/
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000937{
938 SRE_STATE state;
939 PyObject* list;
940 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100941 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000942 Py_ssize_t n;
943 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000944 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000945
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300946 string = fix_string_param(string, source, "source");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200947 if (!string)
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000948 return NULL;
949
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200950 assert(self->codesize != 0);
951 if (self->code[0] != SRE_OP_INFO || self->code[3] == 0) {
952 if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) {
953 PyErr_SetString(PyExc_ValueError,
954 "split() requires a non-empty pattern match.");
955 return NULL;
956 }
957 if (PyErr_WarnEx(PyExc_FutureWarning,
958 "split() requires a non-empty pattern match.",
959 1) < 0)
960 return NULL;
961 }
962
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300963 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000964 return NULL;
965
966 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000967 if (!list) {
968 state_fini(&state);
969 return NULL;
970 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000971
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000972 n = 0;
973 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000974
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000975 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000976
977 state_reset(&state);
978
979 state.ptr = state.start;
980
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300981 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300982 if (PyErr_Occurred())
983 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000984
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000985 if (status <= 0) {
986 if (status == 0)
987 break;
988 pattern_error(status);
989 goto error;
990 }
Tim Peters3d563502006-01-21 02:47:53 +0000991
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000992 if (state.start == state.ptr) {
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +0300993 if (last == state.end || state.ptr == state.end)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000994 break;
995 /* skip one character */
996 state.start = (void*) ((char*) state.ptr + state.charsize);
997 continue;
998 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000999
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001000 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001001 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001002 string, STATE_OFFSET(&state, last),
1003 STATE_OFFSET(&state, state.start)
1004 );
1005 if (!item)
1006 goto error;
1007 status = PyList_Append(list, item);
1008 Py_DECREF(item);
1009 if (status < 0)
1010 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001011
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001012 /* add groups (if any) */
1013 for (i = 0; i < self->groups; i++) {
1014 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001015 if (!item)
1016 goto error;
1017 status = PyList_Append(list, item);
1018 Py_DECREF(item);
1019 if (status < 0)
1020 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001021 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001022
1023 n = n + 1;
1024
1025 last = state.start = state.ptr;
1026
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001027 }
1028
Fredrik Lundhf864aa82001-10-22 06:01:56 +00001029 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001030 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +00001031 string, STATE_OFFSET(&state, last), state.endpos
1032 );
1033 if (!item)
1034 goto error;
1035 status = PyList_Append(list, item);
1036 Py_DECREF(item);
1037 if (status < 0)
1038 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001039
1040 state_fini(&state);
1041 return list;
1042
1043error:
1044 Py_DECREF(list);
1045 state_fini(&state);
1046 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001047
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001048}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001049
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001050static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001051pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001052 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001053{
1054 SRE_STATE state;
1055 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001056 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001057 PyObject* item;
1058 PyObject* filter;
1059 PyObject* args;
1060 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001061 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01001062 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001063 Py_ssize_t n;
1064 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001065 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001066 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001067 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001068
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001069 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001070 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001071 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001072 Py_INCREF(filter);
1073 filter_is_callable = 1;
1074 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001075 /* if not callable, check if it's a literal string */
1076 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001077 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001078 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001079 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001080 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001081 if (charsize == 1)
1082 literal = memchr(ptr, '\\', n) == NULL;
1083 else
1084 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001085 } else {
1086 PyErr_Clear();
1087 literal = 0;
1088 }
Benjamin Petersone48944b2012-03-07 14:50:25 -06001089 if (view.buf)
1090 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001091 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001092 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001093 Py_INCREF(filter);
1094 filter_is_callable = 0;
1095 } else {
1096 /* not a literal; hand it over to the template compiler */
1097 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001098 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001099 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001100 );
1101 if (!filter)
1102 return NULL;
1103 filter_is_callable = PyCallable_Check(filter);
1104 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00001105 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001106
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001107 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001108 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001109 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001110 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001111
1112 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001113 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001114 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001115 state_fini(&state);
1116 return NULL;
1117 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001118
1119 n = i = 0;
1120
1121 while (!count || n < count) {
1122
1123 state_reset(&state);
1124
1125 state.ptr = state.start;
1126
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001127 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001128 if (PyErr_Occurred())
1129 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001130
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001131 if (status <= 0) {
1132 if (status == 0)
1133 break;
1134 pattern_error(status);
1135 goto error;
1136 }
Tim Peters3d563502006-01-21 02:47:53 +00001137
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001138 b = STATE_OFFSET(&state, state.start);
1139 e = STATE_OFFSET(&state, state.ptr);
1140
1141 if (i < b) {
1142 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001143 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001144 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001145 if (!item)
1146 goto error;
1147 status = PyList_Append(list, item);
1148 Py_DECREF(item);
1149 if (status < 0)
1150 goto error;
1151
1152 } else if (i == b && i == e && n > 0)
1153 /* ignore empty match on latest position */
1154 goto next;
1155
1156 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001157 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001158 match = pattern_new_match(self, &state, 1);
1159 if (!match)
1160 goto error;
Raymond Hettinger8ae46892003-10-12 19:09:37 +00001161 args = PyTuple_Pack(1, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001162 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00001163 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001164 goto error;
1165 }
1166 item = PyObject_CallObject(filter, args);
1167 Py_DECREF(args);
1168 Py_DECREF(match);
1169 if (!item)
1170 goto error;
1171 } else {
1172 /* filter is literal string */
1173 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001174 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001175 }
1176
1177 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001178 if (item != Py_None) {
1179 status = PyList_Append(list, item);
1180 Py_DECREF(item);
1181 if (status < 0)
1182 goto error;
1183 }
Tim Peters3d563502006-01-21 02:47:53 +00001184
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001185 i = e;
1186 n = n + 1;
1187
1188next:
1189 /* move on */
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001190 if (state.ptr == state.end)
1191 break;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001192 if (state.ptr == state.start)
1193 state.start = (void*) ((char*) state.ptr + state.charsize);
1194 else
1195 state.start = state.ptr;
1196
1197 }
1198
1199 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001200 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001201 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001202 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001203 if (!item)
1204 goto error;
1205 status = PyList_Append(list, item);
1206 Py_DECREF(item);
1207 if (status < 0)
1208 goto error;
1209 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001210
1211 state_fini(&state);
1212
Guido van Rossum4e173842001-12-07 04:25:10 +00001213 Py_DECREF(filter);
1214
Fredrik Lundhdac58492001-10-21 21:48:30 +00001215 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001216 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001217 if (!joiner) {
1218 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001219 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001220 }
1221 if (PyList_GET_SIZE(list) == 0) {
1222 Py_DECREF(list);
1223 item = joiner;
1224 }
1225 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001226 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001227 item = _PyBytes_Join(joiner, list);
1228 else
1229 item = PyUnicode_Join(joiner, list);
1230 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001231 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001232 if (!item)
1233 return NULL;
1234 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001235
1236 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001237 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001238
1239 return item;
1240
1241error:
1242 Py_DECREF(list);
1243 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001244 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001245 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001246
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001247}
1248
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001249/*[clinic input]
1250_sre.SRE_Pattern.sub
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001251
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001252 repl: object
1253 string: object
1254 count: Py_ssize_t = 0
1255
1256Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1257[clinic start generated code]*/
1258
1259static PyObject *
1260_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1261 PyObject *string, Py_ssize_t count)
1262/*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1263{
1264 return pattern_subx(self, repl, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001265}
1266
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001267/*[clinic input]
1268_sre.SRE_Pattern.subn
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001269
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001270 repl: object
1271 string: object
1272 count: Py_ssize_t = 0
1273
1274Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1275[clinic start generated code]*/
1276
1277static PyObject *
1278_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1279 PyObject *string, Py_ssize_t count)
1280/*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1281{
1282 return pattern_subx(self, repl, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001283}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001284
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001285/*[clinic input]
1286_sre.SRE_Pattern.__copy__
1287
1288[clinic start generated code]*/
1289
1290static PyObject *
1291_sre_SRE_Pattern___copy___impl(PatternObject *self)
1292/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001293{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001294#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001295 PatternObject* copy;
1296 int offset;
1297
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001298 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
1299 if (!copy)
1300 return NULL;
1301
1302 offset = offsetof(PatternObject, groups);
1303
1304 Py_XINCREF(self->groupindex);
1305 Py_XINCREF(self->indexgroup);
1306 Py_XINCREF(self->pattern);
1307
1308 memcpy((char*) copy + offset, (char*) self + offset,
1309 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00001310 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001311
1312 return (PyObject*) copy;
1313#else
1314 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
1315 return NULL;
1316#endif
1317}
1318
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001319/*[clinic input]
1320_sre.SRE_Pattern.__deepcopy__
1321
1322 memo: object
1323
1324[clinic start generated code]*/
1325
1326static PyObject *
1327_sre_SRE_Pattern___deepcopy___impl(PatternObject *self, PyObject *memo)
1328/*[clinic end generated code: output=75efe69bd12c5d7d input=3959719482c07f70]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001329{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001330#ifdef USE_BUILTIN_COPY
1331 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00001332
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001333 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001334 if (!copy)
1335 return NULL;
1336
1337 if (!deepcopy(&copy->groupindex, memo) ||
1338 !deepcopy(&copy->indexgroup, memo) ||
1339 !deepcopy(&copy->pattern, memo)) {
1340 Py_DECREF(copy);
1341 return NULL;
1342 }
1343
1344#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001345 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
1346 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001347#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001348}
1349
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001350static PyObject *
1351pattern_repr(PatternObject *obj)
1352{
1353 static const struct {
1354 const char *name;
1355 int value;
1356 } flag_names[] = {
1357 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1358 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1359 {"re.LOCALE", SRE_FLAG_LOCALE},
1360 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1361 {"re.DOTALL", SRE_FLAG_DOTALL},
1362 {"re.UNICODE", SRE_FLAG_UNICODE},
1363 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1364 {"re.DEBUG", SRE_FLAG_DEBUG},
1365 {"re.ASCII", SRE_FLAG_ASCII},
1366 };
1367 PyObject *result = NULL;
1368 PyObject *flag_items;
Victor Stinner706768c2014-08-16 01:03:39 +02001369 size_t i;
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001370 int flags = obj->flags;
1371
1372 /* Omit re.UNICODE for valid string patterns. */
1373 if (obj->isbytes == 0 &&
1374 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1375 SRE_FLAG_UNICODE)
1376 flags &= ~SRE_FLAG_UNICODE;
1377
1378 flag_items = PyList_New(0);
1379 if (!flag_items)
1380 return NULL;
1381
1382 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1383 if (flags & flag_names[i].value) {
1384 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1385 if (!item)
1386 goto done;
1387
1388 if (PyList_Append(flag_items, item) < 0) {
1389 Py_DECREF(item);
1390 goto done;
1391 }
1392 Py_DECREF(item);
1393 flags &= ~flag_names[i].value;
1394 }
1395 }
1396 if (flags) {
1397 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1398 if (!item)
1399 goto done;
1400
1401 if (PyList_Append(flag_items, item) < 0) {
1402 Py_DECREF(item);
1403 goto done;
1404 }
1405 Py_DECREF(item);
1406 }
1407
1408 if (PyList_Size(flag_items) > 0) {
1409 PyObject *flags_result;
1410 PyObject *sep = PyUnicode_FromString("|");
1411 if (!sep)
1412 goto done;
1413 flags_result = PyUnicode_Join(sep, flag_items);
1414 Py_DECREF(sep);
1415 if (!flags_result)
1416 goto done;
1417 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1418 obj->pattern, flags_result);
1419 Py_DECREF(flags_result);
1420 }
1421 else {
1422 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1423 }
1424
1425done:
1426 Py_DECREF(flag_items);
1427 return result;
1428}
1429
Raymond Hettinger94478742004-09-24 04:31:19 +00001430PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
1431
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001432/* PatternObject's 'groupindex' method. */
1433static PyObject *
1434pattern_groupindex(PatternObject *self)
1435{
1436 return PyDictProxy_New(self->groupindex);
1437}
1438
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001439static int _validate(PatternObject *self); /* Forward */
1440
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001441/*[clinic input]
1442_sre.compile
1443
1444 pattern: object
1445 flags: int
1446 code: object(subclass_of='&PyList_Type')
1447 groups: Py_ssize_t
1448 groupindex: object
1449 indexgroup: object
1450
1451[clinic start generated code]*/
1452
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001453static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001454_sre_compile_impl(PyModuleDef *module, PyObject *pattern, int flags,
1455 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1456 PyObject *indexgroup)
1457/*[clinic end generated code: output=3004b293730bf309 input=7d059ec8ae1edb85]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001458{
1459 /* "compile" pattern descriptor to pattern object */
1460
1461 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001462 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001463
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001464 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001465 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001466 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1467 if (!self)
1468 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001469 self->weakreflist = NULL;
1470 self->pattern = NULL;
1471 self->groupindex = NULL;
1472 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001473
1474 self->codesize = n;
1475
1476 for (i = 0; i < n; i++) {
1477 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001478 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001479 self->code[i] = (SRE_CODE) value;
1480 if ((unsigned long) self->code[i] != value) {
1481 PyErr_SetString(PyExc_OverflowError,
1482 "regular expression code size limit exceeded");
1483 break;
1484 }
1485 }
1486
1487 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001488 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001489 return NULL;
1490 }
1491
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001492 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001493 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001494 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001495 else {
1496 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001497 int charsize;
1498 Py_buffer view;
1499 view.buf = NULL;
1500 if (!getstring(pattern, &p_length, &self->isbytes,
1501 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001502 Py_DECREF(self);
1503 return NULL;
1504 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001505 if (view.buf)
1506 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001507 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001508
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001509 Py_INCREF(pattern);
1510 self->pattern = pattern;
1511
1512 self->flags = flags;
1513
1514 self->groups = groups;
1515
1516 Py_XINCREF(groupindex);
1517 self->groupindex = groupindex;
1518
1519 Py_XINCREF(indexgroup);
1520 self->indexgroup = indexgroup;
1521
1522 self->weakreflist = NULL;
1523
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001524 if (!_validate(self)) {
1525 Py_DECREF(self);
1526 return NULL;
1527 }
1528
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001529 return (PyObject*) self;
1530}
1531
Guido van Rossumb700df92000-03-31 14:59:30 +00001532/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001533/* Code validation */
1534
1535/* To learn more about this code, have a look at the _compile() function in
1536 Lib/sre_compile.py. The validation functions below checks the code array
1537 for conformance with the code patterns generated there.
1538
1539 The nice thing about the generated code is that it is position-independent:
1540 all jumps are relative jumps forward. Also, jumps don't cross each other:
1541 the target of a later jump is always earlier than the target of an earlier
1542 jump. IOW, this is okay:
1543
1544 J---------J-------T--------T
1545 \ \_____/ /
1546 \______________________/
1547
1548 but this is not:
1549
1550 J---------J-------T--------T
1551 \_________\_____/ /
1552 \____________/
1553
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001554 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001555*/
1556
1557/* Defining this one enables tracing of the validator */
1558#undef VVERBOSE
1559
1560/* Trace macro for the validator */
1561#if defined(VVERBOSE)
1562#define VTRACE(v) printf v
1563#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001564#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001565#endif
1566
1567/* Report failure */
1568#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1569
1570/* Extract opcode, argument, or skip count from code array */
1571#define GET_OP \
1572 do { \
1573 VTRACE(("%p: ", code)); \
1574 if (code >= end) FAIL; \
1575 op = *code++; \
1576 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1577 } while (0)
1578#define GET_ARG \
1579 do { \
1580 VTRACE(("%p= ", code)); \
1581 if (code >= end) FAIL; \
1582 arg = *code++; \
1583 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1584 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001585#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001586 do { \
1587 VTRACE(("%p= ", code)); \
1588 if (code >= end) FAIL; \
1589 skip = *code; \
1590 VTRACE(("%lu (skip to %p)\n", \
1591 (unsigned long)skip, code+skip)); \
Victor Stinner1fa174a2013-08-28 02:06:21 +02001592 if (skip-adj > (Py_uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001593 FAIL; \
1594 code++; \
1595 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001596#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001597
1598static int
1599_validate_charset(SRE_CODE *code, SRE_CODE *end)
1600{
1601 /* Some variables are manipulated by the macros above */
1602 SRE_CODE op;
1603 SRE_CODE arg;
1604 SRE_CODE offset;
1605 int i;
1606
1607 while (code < end) {
1608 GET_OP;
1609 switch (op) {
1610
1611 case SRE_OP_NEGATE:
1612 break;
1613
1614 case SRE_OP_LITERAL:
1615 GET_ARG;
1616 break;
1617
1618 case SRE_OP_RANGE:
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +02001619 case SRE_OP_RANGE_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001620 GET_ARG;
1621 GET_ARG;
1622 break;
1623
1624 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001625 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001626 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001627 FAIL;
1628 code += offset;
1629 break;
1630
1631 case SRE_OP_BIGCHARSET:
1632 GET_ARG; /* Number of blocks */
1633 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001634 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001635 FAIL;
1636 /* Make sure that each byte points to a valid block */
1637 for (i = 0; i < 256; i++) {
1638 if (((unsigned char *)code)[i] >= arg)
1639 FAIL;
1640 }
1641 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001642 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001643 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001644 FAIL;
1645 code += offset;
1646 break;
1647
1648 case SRE_OP_CATEGORY:
1649 GET_ARG;
1650 switch (arg) {
1651 case SRE_CATEGORY_DIGIT:
1652 case SRE_CATEGORY_NOT_DIGIT:
1653 case SRE_CATEGORY_SPACE:
1654 case SRE_CATEGORY_NOT_SPACE:
1655 case SRE_CATEGORY_WORD:
1656 case SRE_CATEGORY_NOT_WORD:
1657 case SRE_CATEGORY_LINEBREAK:
1658 case SRE_CATEGORY_NOT_LINEBREAK:
1659 case SRE_CATEGORY_LOC_WORD:
1660 case SRE_CATEGORY_LOC_NOT_WORD:
1661 case SRE_CATEGORY_UNI_DIGIT:
1662 case SRE_CATEGORY_UNI_NOT_DIGIT:
1663 case SRE_CATEGORY_UNI_SPACE:
1664 case SRE_CATEGORY_UNI_NOT_SPACE:
1665 case SRE_CATEGORY_UNI_WORD:
1666 case SRE_CATEGORY_UNI_NOT_WORD:
1667 case SRE_CATEGORY_UNI_LINEBREAK:
1668 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1669 break;
1670 default:
1671 FAIL;
1672 }
1673 break;
1674
1675 default:
1676 FAIL;
1677
1678 }
1679 }
1680
1681 return 1;
1682}
1683
1684static int
1685_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1686{
1687 /* Some variables are manipulated by the macros above */
1688 SRE_CODE op;
1689 SRE_CODE arg;
1690 SRE_CODE skip;
1691
1692 VTRACE(("code=%p, end=%p\n", code, end));
1693
1694 if (code > end)
1695 FAIL;
1696
1697 while (code < end) {
1698 GET_OP;
1699 switch (op) {
1700
1701 case SRE_OP_MARK:
1702 /* We don't check whether marks are properly nested; the
1703 sre_match() code is robust even if they don't, and the worst
1704 you can get is nonsensical match results. */
1705 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001706 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001707 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1708 FAIL;
1709 }
1710 break;
1711
1712 case SRE_OP_LITERAL:
1713 case SRE_OP_NOT_LITERAL:
1714 case SRE_OP_LITERAL_IGNORE:
1715 case SRE_OP_NOT_LITERAL_IGNORE:
1716 GET_ARG;
1717 /* The arg is just a character, nothing to check */
1718 break;
1719
1720 case SRE_OP_SUCCESS:
1721 case SRE_OP_FAILURE:
1722 /* Nothing to check; these normally end the matching process */
1723 break;
1724
1725 case SRE_OP_AT:
1726 GET_ARG;
1727 switch (arg) {
1728 case SRE_AT_BEGINNING:
1729 case SRE_AT_BEGINNING_STRING:
1730 case SRE_AT_BEGINNING_LINE:
1731 case SRE_AT_END:
1732 case SRE_AT_END_LINE:
1733 case SRE_AT_END_STRING:
1734 case SRE_AT_BOUNDARY:
1735 case SRE_AT_NON_BOUNDARY:
1736 case SRE_AT_LOC_BOUNDARY:
1737 case SRE_AT_LOC_NON_BOUNDARY:
1738 case SRE_AT_UNI_BOUNDARY:
1739 case SRE_AT_UNI_NON_BOUNDARY:
1740 break;
1741 default:
1742 FAIL;
1743 }
1744 break;
1745
1746 case SRE_OP_ANY:
1747 case SRE_OP_ANY_ALL:
1748 /* These have no operands */
1749 break;
1750
1751 case SRE_OP_IN:
1752 case SRE_OP_IN_IGNORE:
1753 GET_SKIP;
1754 /* Stop 1 before the end; we check the FAILURE below */
1755 if (!_validate_charset(code, code+skip-2))
1756 FAIL;
1757 if (code[skip-2] != SRE_OP_FAILURE)
1758 FAIL;
1759 code += skip-1;
1760 break;
1761
1762 case SRE_OP_INFO:
1763 {
1764 /* A minimal info field is
1765 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1766 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1767 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001768 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001769 SRE_CODE *newcode;
1770 GET_SKIP;
1771 newcode = code+skip-1;
1772 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001773 GET_ARG;
1774 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001775 /* Check that only valid flags are present */
1776 if ((flags & ~(SRE_INFO_PREFIX |
1777 SRE_INFO_LITERAL |
1778 SRE_INFO_CHARSET)) != 0)
1779 FAIL;
1780 /* PREFIX and CHARSET are mutually exclusive */
1781 if ((flags & SRE_INFO_PREFIX) &&
1782 (flags & SRE_INFO_CHARSET))
1783 FAIL;
1784 /* LITERAL implies PREFIX */
1785 if ((flags & SRE_INFO_LITERAL) &&
1786 !(flags & SRE_INFO_PREFIX))
1787 FAIL;
1788 /* Validate the prefix */
1789 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001790 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001791 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001792 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001793 /* Here comes the prefix string */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001794 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001795 FAIL;
1796 code += prefix_len;
1797 /* And here comes the overlap table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001798 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001799 FAIL;
1800 /* Each overlap value should be < prefix_len */
1801 for (i = 0; i < prefix_len; i++) {
1802 if (code[i] >= prefix_len)
1803 FAIL;
1804 }
1805 code += prefix_len;
1806 }
1807 /* Validate the charset */
1808 if (flags & SRE_INFO_CHARSET) {
1809 if (!_validate_charset(code, newcode-1))
1810 FAIL;
1811 if (newcode[-1] != SRE_OP_FAILURE)
1812 FAIL;
1813 code = newcode;
1814 }
1815 else if (code != newcode) {
1816 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1817 FAIL;
1818 }
1819 }
1820 break;
1821
1822 case SRE_OP_BRANCH:
1823 {
1824 SRE_CODE *target = NULL;
1825 for (;;) {
1826 GET_SKIP;
1827 if (skip == 0)
1828 break;
1829 /* Stop 2 before the end; we check the JUMP below */
1830 if (!_validate_inner(code, code+skip-3, groups))
1831 FAIL;
1832 code += skip-3;
1833 /* Check that it ends with a JUMP, and that each JUMP
1834 has the same target */
1835 GET_OP;
1836 if (op != SRE_OP_JUMP)
1837 FAIL;
1838 GET_SKIP;
1839 if (target == NULL)
1840 target = code+skip-1;
1841 else if (code+skip-1 != target)
1842 FAIL;
1843 }
1844 }
1845 break;
1846
1847 case SRE_OP_REPEAT_ONE:
1848 case SRE_OP_MIN_REPEAT_ONE:
1849 {
1850 SRE_CODE min, max;
1851 GET_SKIP;
1852 GET_ARG; min = arg;
1853 GET_ARG; max = arg;
1854 if (min > max)
1855 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001856 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001857 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001858 if (!_validate_inner(code, code+skip-4, groups))
1859 FAIL;
1860 code += skip-4;
1861 GET_OP;
1862 if (op != SRE_OP_SUCCESS)
1863 FAIL;
1864 }
1865 break;
1866
1867 case SRE_OP_REPEAT:
1868 {
1869 SRE_CODE min, max;
1870 GET_SKIP;
1871 GET_ARG; min = arg;
1872 GET_ARG; max = arg;
1873 if (min > max)
1874 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001875 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001876 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001877 if (!_validate_inner(code, code+skip-3, groups))
1878 FAIL;
1879 code += skip-3;
1880 GET_OP;
1881 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1882 FAIL;
1883 }
1884 break;
1885
1886 case SRE_OP_GROUPREF:
1887 case SRE_OP_GROUPREF_IGNORE:
1888 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001889 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001890 FAIL;
1891 break;
1892
1893 case SRE_OP_GROUPREF_EXISTS:
1894 /* The regex syntax for this is: '(?(group)then|else)', where
1895 'group' is either an integer group number or a group name,
1896 'then' and 'else' are sub-regexes, and 'else' is optional. */
1897 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001898 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001899 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001900 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001901 code--; /* The skip is relative to the first arg! */
1902 /* There are two possibilities here: if there is both a 'then'
1903 part and an 'else' part, the generated code looks like:
1904
1905 GROUPREF_EXISTS
1906 <group>
1907 <skipyes>
1908 ...then part...
1909 JUMP
1910 <skipno>
1911 (<skipyes> jumps here)
1912 ...else part...
1913 (<skipno> jumps here)
1914
1915 If there is only a 'then' part, it looks like:
1916
1917 GROUPREF_EXISTS
1918 <group>
1919 <skip>
1920 ...then part...
1921 (<skip> jumps here)
1922
1923 There is no direct way to decide which it is, and we don't want
1924 to allow arbitrary jumps anywhere in the code; so we just look
1925 for a JUMP opcode preceding our skip target.
1926 */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001927 if (skip >= 3 && skip-3 < (Py_uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001928 code[skip-3] == SRE_OP_JUMP)
1929 {
1930 VTRACE(("both then and else parts present\n"));
1931 if (!_validate_inner(code+1, code+skip-3, groups))
1932 FAIL;
1933 code += skip-2; /* Position after JUMP, at <skipno> */
1934 GET_SKIP;
1935 if (!_validate_inner(code, code+skip-1, groups))
1936 FAIL;
1937 code += skip-1;
1938 }
1939 else {
1940 VTRACE(("only a then part present\n"));
1941 if (!_validate_inner(code+1, code+skip-1, groups))
1942 FAIL;
1943 code += skip-1;
1944 }
1945 break;
1946
1947 case SRE_OP_ASSERT:
1948 case SRE_OP_ASSERT_NOT:
1949 GET_SKIP;
1950 GET_ARG; /* 0 for lookahead, width for lookbehind */
1951 code--; /* Back up over arg to simplify math below */
1952 if (arg & 0x80000000)
1953 FAIL; /* Width too large */
1954 /* Stop 1 before the end; we check the SUCCESS below */
1955 if (!_validate_inner(code+1, code+skip-2, groups))
1956 FAIL;
1957 code += skip-2;
1958 GET_OP;
1959 if (op != SRE_OP_SUCCESS)
1960 FAIL;
1961 break;
1962
1963 default:
1964 FAIL;
1965
1966 }
1967 }
1968
1969 VTRACE(("okay\n"));
1970 return 1;
1971}
1972
1973static int
1974_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1975{
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001976 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1977 code >= end || end[-1] != SRE_OP_SUCCESS)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001978 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001979 return _validate_inner(code, end-1, groups);
1980}
1981
1982static int
1983_validate(PatternObject *self)
1984{
1985 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1986 {
1987 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1988 return 0;
1989 }
1990 else
1991 VTRACE(("Success!\n"));
1992 return 1;
1993}
1994
1995/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001996/* match methods */
1997
1998static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001999match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002000{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002001 Py_XDECREF(self->regs);
2002 Py_XDECREF(self->string);
2003 Py_DECREF(self->pattern);
2004 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00002005}
2006
2007static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002008match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00002009{
Serhiy Storchaka25324972013-10-16 12:46:28 +03002010 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002011 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03002012 Py_buffer view;
2013 PyObject *result;
2014 void* ptr;
2015
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002016 if (index < 0 || index >= self->groups) {
2017 /* raise IndexError if we were given a bad group number */
2018 PyErr_SetString(
2019 PyExc_IndexError,
2020 "no such group"
2021 );
2022 return NULL;
2023 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002024
Fredrik Lundh6f013982000-07-03 18:44:21 +00002025 index *= 2;
2026
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002027 if (self->string == Py_None || self->mark[index] < 0) {
2028 /* return default value if the string or group is undefined */
2029 Py_INCREF(def);
2030 return def;
2031 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002032
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002033 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03002034 if (ptr == NULL)
2035 return NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002036 result = getslice(isbytes, ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +03002037 self->string, self->mark[index], self->mark[index+1]);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002038 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03002039 PyBuffer_Release(&view);
2040 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002041}
2042
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002043static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002044match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002045{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002046 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002047
Guido van Rossumddefaf32007-01-14 03:31:43 +00002048 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002049 /* Default value */
2050 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00002051
Christian Heimes217cfd12007-12-02 14:31:20 +00002052 if (PyLong_Check(index))
2053 return PyLong_AsSsize_t(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00002054
Fredrik Lundh6f013982000-07-03 18:44:21 +00002055 i = -1;
2056
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002057 if (self->pattern->groupindex) {
2058 index = PyObject_GetItem(self->pattern->groupindex, index);
2059 if (index) {
Neal Norwitz1fe5f382007-08-31 04:32:55 +00002060 if (PyLong_Check(index))
Christian Heimes217cfd12007-12-02 14:31:20 +00002061 i = PyLong_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00002062 Py_DECREF(index);
2063 } else
2064 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002065 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002066
2067 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002068}
2069
2070static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002071match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002072{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002073 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002074}
2075
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002076/*[clinic input]
2077_sre.SRE_Match.expand
2078
2079 template: object
2080
2081Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
2082[clinic start generated code]*/
2083
2084static PyObject *
2085_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
2086/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002087{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002088 /* delegate to Python code */
2089 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00002090 SRE_PY_MODULE, "_expand",
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002091 PyTuple_Pack(3, self->pattern, self, template)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002092 );
2093}
2094
2095static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002096match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002097{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002098 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002099 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002100
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002101 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002102
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002103 switch (size) {
2104 case 0:
2105 result = match_getslice(self, Py_False, Py_None);
2106 break;
2107 case 1:
2108 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2109 break;
2110 default:
2111 /* fetch multiple items */
2112 result = PyTuple_New(size);
2113 if (!result)
2114 return NULL;
2115 for (i = 0; i < size; i++) {
2116 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002117 self, PyTuple_GET_ITEM(args, i), Py_None
2118 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002119 if (!item) {
2120 Py_DECREF(result);
2121 return NULL;
2122 }
2123 PyTuple_SET_ITEM(result, i, item);
2124 }
2125 break;
2126 }
2127 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002128}
2129
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002130/*[clinic input]
2131_sre.SRE_Match.groups
2132
2133 default: object = None
2134 Is used for groups that did not participate in the match.
2135
2136Return a tuple containing all the subgroups of the match, from 1.
2137[clinic start generated code]*/
2138
2139static PyObject *
2140_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2141/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002142{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002143 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002144 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002145
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002146 result = PyTuple_New(self->groups-1);
2147 if (!result)
2148 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002149
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002150 for (index = 1; index < self->groups; index++) {
2151 PyObject* item;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002152 item = match_getslice_by_index(self, index, default_value);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002153 if (!item) {
2154 Py_DECREF(result);
2155 return NULL;
2156 }
2157 PyTuple_SET_ITEM(result, index-1, item);
2158 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002159
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002160 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002161}
2162
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002163/*[clinic input]
2164_sre.SRE_Match.groupdict
2165
2166 default: object = None
2167 Is used for groups that did not participate in the match.
2168
2169Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2170[clinic start generated code]*/
2171
2172static PyObject *
2173_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2174/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002175{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002176 PyObject* result;
2177 PyObject* keys;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002178 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002179
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002180 result = PyDict_New();
2181 if (!result || !self->pattern->groupindex)
2182 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002183
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002184 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002185 if (!keys)
2186 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002187
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002188 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002189 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002190 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002191 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002192 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002193 if (!key)
2194 goto failed;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002195 value = match_getslice(self, key, default_value);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002196 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002197 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002198 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002199 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002200 status = PyDict_SetItem(result, key, value);
2201 Py_DECREF(value);
2202 if (status < 0)
2203 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002204 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002205
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002206 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002207
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002208 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002209
2210failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00002211 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002212 Py_DECREF(result);
2213 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002214}
2215
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002216/*[clinic input]
2217_sre.SRE_Match.start -> Py_ssize_t
2218
2219 group: object(c_default="NULL") = 0
2220 /
2221
2222Return index of the start of the substring matched by group.
2223[clinic start generated code]*/
2224
2225static Py_ssize_t
2226_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2227/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002228{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002229 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002230
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002231 if (index < 0 || index >= self->groups) {
2232 PyErr_SetString(
2233 PyExc_IndexError,
2234 "no such group"
2235 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002236 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002237 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002238
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002239 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002240 return self->mark[index*2];
Guido van Rossumb700df92000-03-31 14:59:30 +00002241}
2242
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002243/*[clinic input]
2244_sre.SRE_Match.end -> Py_ssize_t
2245
2246 group: object(c_default="NULL") = 0
2247 /
2248
2249Return index of the end of the substring matched by group.
2250[clinic start generated code]*/
2251
2252static Py_ssize_t
2253_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2254/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002255{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002256 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002257
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002258 if (index < 0 || index >= self->groups) {
2259 PyErr_SetString(
2260 PyExc_IndexError,
2261 "no such group"
2262 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002263 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002264 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002265
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002266 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002267 return self->mark[index*2+1];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002268}
2269
2270LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002271_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002272{
2273 PyObject* pair;
2274 PyObject* item;
2275
2276 pair = PyTuple_New(2);
2277 if (!pair)
2278 return NULL;
2279
Christian Heimes217cfd12007-12-02 14:31:20 +00002280 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002281 if (!item)
2282 goto error;
2283 PyTuple_SET_ITEM(pair, 0, item);
2284
Christian Heimes217cfd12007-12-02 14:31:20 +00002285 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002286 if (!item)
2287 goto error;
2288 PyTuple_SET_ITEM(pair, 1, item);
2289
2290 return pair;
2291
2292 error:
2293 Py_DECREF(pair);
2294 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002295}
2296
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002297/*[clinic input]
2298_sre.SRE_Match.span
2299
2300 group: object(c_default="NULL") = 0
2301 /
2302
2303For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).
2304[clinic start generated code]*/
2305
2306static PyObject *
2307_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2308/*[clinic end generated code: output=f02ae40594d14fe6 input=49092b6008d176d3]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002309{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002310 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002311
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002312 if (index < 0 || index >= self->groups) {
2313 PyErr_SetString(
2314 PyExc_IndexError,
2315 "no such group"
2316 );
2317 return NULL;
2318 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002319
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002320 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002321 return _pair(self->mark[index*2], self->mark[index*2+1]);
2322}
2323
2324static PyObject*
2325match_regs(MatchObject* self)
2326{
2327 PyObject* regs;
2328 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002329 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002330
2331 regs = PyTuple_New(self->groups);
2332 if (!regs)
2333 return NULL;
2334
2335 for (index = 0; index < self->groups; index++) {
2336 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2337 if (!item) {
2338 Py_DECREF(regs);
2339 return NULL;
2340 }
2341 PyTuple_SET_ITEM(regs, index, item);
2342 }
2343
2344 Py_INCREF(regs);
2345 self->regs = regs;
2346
2347 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002348}
2349
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002350/*[clinic input]
2351_sre.SRE_Match.__copy__
2352
2353[clinic start generated code]*/
2354
2355static PyObject *
2356_sre_SRE_Match___copy___impl(MatchObject *self)
2357/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002358{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002359#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002360 MatchObject* copy;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002361 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00002362
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002363 slots = 2 * (self->pattern->groups+1);
2364
2365 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2366 if (!copy)
2367 return NULL;
2368
2369 /* this value a constant, but any compiler should be able to
2370 figure that out all by itself */
2371 offset = offsetof(MatchObject, string);
2372
2373 Py_XINCREF(self->pattern);
2374 Py_XINCREF(self->string);
2375 Py_XINCREF(self->regs);
2376
2377 memcpy((char*) copy + offset, (char*) self + offset,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002378 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002379
2380 return (PyObject*) copy;
2381#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002382 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002383 return NULL;
2384#endif
2385}
2386
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002387/*[clinic input]
2388_sre.SRE_Match.__deepcopy__
2389
2390 memo: object
2391
2392[clinic start generated code]*/
2393
2394static PyObject *
2395_sre_SRE_Match___deepcopy___impl(MatchObject *self, PyObject *memo)
2396/*[clinic end generated code: output=2b657578eb03f4a3 input=b65b72489eac64cc]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002397{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002398#ifdef USE_BUILTIN_COPY
2399 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002400
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002401 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002402 if (!copy)
2403 return NULL;
2404
2405 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2406 !deepcopy(&copy->string, memo) ||
2407 !deepcopy(&copy->regs, memo)) {
2408 Py_DECREF(copy);
2409 return NULL;
2410 }
2411
2412#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002413 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2414 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002415#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002416}
2417
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002418PyDoc_STRVAR(match_doc,
2419"The result of re.match() and re.search().\n\
2420Match objects always have a boolean value of True.");
2421
2422PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002423"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002424 Return subgroup(s) of the match by indices or names.\n\
2425 For 0 returns the entire match.");
2426
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002427static PyObject *
2428match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002429{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002430 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002431 return PyLong_FromSsize_t(self->lastindex);
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002432 Py_INCREF(Py_None);
2433 return Py_None;
Guido van Rossumb700df92000-03-31 14:59:30 +00002434}
2435
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002436static PyObject *
2437match_lastgroup_get(MatchObject *self)
2438{
2439 if (self->pattern->indexgroup && self->lastindex >= 0) {
2440 PyObject* result = PySequence_GetItem(
2441 self->pattern->indexgroup, self->lastindex
2442 );
2443 if (result)
2444 return result;
2445 PyErr_Clear();
2446 }
2447 Py_INCREF(Py_None);
2448 return Py_None;
2449}
2450
2451static PyObject *
2452match_regs_get(MatchObject *self)
2453{
2454 if (self->regs) {
2455 Py_INCREF(self->regs);
2456 return self->regs;
2457 } else
2458 return match_regs(self);
2459}
2460
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002461static PyObject *
2462match_repr(MatchObject *self)
2463{
2464 PyObject *result;
2465 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2466 if (group0 == NULL)
2467 return NULL;
2468 result = PyUnicode_FromFormat(
2469 "<%s object; span=(%d, %d), match=%.50R>",
2470 Py_TYPE(self)->tp_name,
2471 self->mark[0], self->mark[1], group0);
2472 Py_DECREF(group0);
2473 return result;
2474}
2475
2476
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002477static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002478pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002479{
2480 /* create match object (from state object) */
2481
2482 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002483 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002484 char* base;
2485 int n;
2486
2487 if (status > 0) {
2488
2489 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002490 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002491 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2492 2*(pattern->groups+1));
2493 if (!match)
2494 return NULL;
2495
2496 Py_INCREF(pattern);
2497 match->pattern = pattern;
2498
2499 Py_INCREF(state->string);
2500 match->string = state->string;
2501
2502 match->regs = NULL;
2503 match->groups = pattern->groups+1;
2504
2505 /* fill in group slices */
2506
2507 base = (char*) state->beginning;
2508 n = state->charsize;
2509
2510 match->mark[0] = ((char*) state->start - base) / n;
2511 match->mark[1] = ((char*) state->ptr - base) / n;
2512
2513 for (i = j = 0; i < pattern->groups; i++, j+=2)
2514 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2515 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2516 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2517 } else
2518 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2519
2520 match->pos = state->pos;
2521 match->endpos = state->endpos;
2522
2523 match->lastindex = state->lastindex;
2524
2525 return (PyObject*) match;
2526
2527 } else if (status == 0) {
2528
2529 /* no match */
2530 Py_INCREF(Py_None);
2531 return Py_None;
2532
2533 }
2534
2535 /* internal error */
2536 pattern_error(status);
2537 return NULL;
2538}
2539
2540
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002541/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002542/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002543
2544static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002545scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002546{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002547 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002548 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002549 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002550}
2551
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002552/*[clinic input]
2553_sre.SRE_Scanner.match
2554
2555[clinic start generated code]*/
2556
2557static PyObject *
2558_sre_SRE_Scanner_match_impl(ScannerObject *self)
2559/*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002560{
2561 SRE_STATE* state = &self->state;
2562 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002563 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002564
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002565 if (state->start == NULL)
2566 Py_RETURN_NONE;
2567
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002568 state_reset(state);
2569
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002570 state->ptr = state->start;
2571
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03002572 status = sre_match(state, PatternObject_GetCode(self->pattern), 0);
Thomas Wouters89f507f2006-12-13 04:49:30 +00002573 if (PyErr_Occurred())
2574 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002575
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002576 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002577 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002578
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002579 if (status == 0)
2580 state->start = NULL;
2581 else if (state->ptr != state->start)
2582 state->start = state->ptr;
2583 else if (state->ptr != state->end)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002584 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002585 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002586 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002587
2588 return match;
2589}
2590
2591
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002592/*[clinic input]
2593_sre.SRE_Scanner.search
2594
2595[clinic start generated code]*/
2596
2597static PyObject *
2598_sre_SRE_Scanner_search_impl(ScannerObject *self)
2599/*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002600{
2601 SRE_STATE* state = &self->state;
2602 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002603 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002604
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002605 if (state->start == NULL)
2606 Py_RETURN_NONE;
2607
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002608 state_reset(state);
2609
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002610 state->ptr = state->start;
2611
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002612 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002613 if (PyErr_Occurred())
2614 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002615
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002616 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002617 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002618
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002619 if (status == 0)
2620 state->start = NULL;
2621 else if (state->ptr != state->start)
2622 state->start = state->ptr;
2623 else if (state->ptr != state->end)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002624 state->start = (void*) ((char*) state->ptr + state->charsize);
2625 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002626 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002627
2628 return match;
2629}
2630
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002631static PyObject *
2632pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002633{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002634 ScannerObject* scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002635
2636 /* create scanner object */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002637 scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2638 if (!scanner)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002639 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002640 scanner->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002641
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002642 /* create search state object */
2643 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2644 Py_DECREF(scanner);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002645 return NULL;
2646 }
2647
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002648 Py_INCREF(self);
2649 scanner->pattern = (PyObject*) self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002650
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002651 return (PyObject*) scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002652}
2653
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002654#include "clinic/_sre.c.h"
2655
2656static PyMethodDef pattern_methods[] = {
2657 _SRE_SRE_PATTERN_MATCH_METHODDEF
2658 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2659 _SRE_SRE_PATTERN_SEARCH_METHODDEF
2660 _SRE_SRE_PATTERN_SUB_METHODDEF
2661 _SRE_SRE_PATTERN_SUBN_METHODDEF
2662 _SRE_SRE_PATTERN_FINDALL_METHODDEF
2663 _SRE_SRE_PATTERN_SPLIT_METHODDEF
2664 _SRE_SRE_PATTERN_FINDITER_METHODDEF
2665 _SRE_SRE_PATTERN_SCANNER_METHODDEF
2666 _SRE_SRE_PATTERN___COPY___METHODDEF
2667 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2668 {NULL, NULL}
2669};
2670
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002671static PyGetSetDef pattern_getset[] = {
2672 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2673 "A dictionary mapping group names to group numbers."},
2674 {NULL} /* Sentinel */
2675};
2676
2677#define PAT_OFF(x) offsetof(PatternObject, x)
2678static PyMemberDef pattern_members[] = {
2679 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
2680 {"flags", T_INT, PAT_OFF(flags), READONLY},
2681 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
2682 {NULL} /* Sentinel */
2683};
2684
2685static PyTypeObject Pattern_Type = {
2686 PyVarObject_HEAD_INIT(NULL, 0)
2687 "_" SRE_MODULE ".SRE_Pattern",
2688 sizeof(PatternObject), sizeof(SRE_CODE),
2689 (destructor)pattern_dealloc, /* tp_dealloc */
2690 0, /* tp_print */
2691 0, /* tp_getattr */
2692 0, /* tp_setattr */
2693 0, /* tp_reserved */
2694 (reprfunc)pattern_repr, /* tp_repr */
2695 0, /* tp_as_number */
2696 0, /* tp_as_sequence */
2697 0, /* tp_as_mapping */
2698 0, /* tp_hash */
2699 0, /* tp_call */
2700 0, /* tp_str */
2701 0, /* tp_getattro */
2702 0, /* tp_setattro */
2703 0, /* tp_as_buffer */
2704 Py_TPFLAGS_DEFAULT, /* tp_flags */
2705 pattern_doc, /* tp_doc */
2706 0, /* tp_traverse */
2707 0, /* tp_clear */
2708 0, /* tp_richcompare */
2709 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2710 0, /* tp_iter */
2711 0, /* tp_iternext */
2712 pattern_methods, /* tp_methods */
2713 pattern_members, /* tp_members */
2714 pattern_getset, /* tp_getset */
2715};
2716
2717
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002718static PyMethodDef match_methods[] = {
2719 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2720 _SRE_SRE_MATCH_START_METHODDEF
2721 _SRE_SRE_MATCH_END_METHODDEF
2722 _SRE_SRE_MATCH_SPAN_METHODDEF
2723 _SRE_SRE_MATCH_GROUPS_METHODDEF
2724 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2725 _SRE_SRE_MATCH_EXPAND_METHODDEF
2726 _SRE_SRE_MATCH___COPY___METHODDEF
2727 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2728 {NULL, NULL}
2729};
2730
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002731static PyGetSetDef match_getset[] = {
2732 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
2733 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
2734 {"regs", (getter)match_regs_get, (setter)NULL},
2735 {NULL}
2736};
2737
2738#define MATCH_OFF(x) offsetof(MatchObject, x)
2739static PyMemberDef match_members[] = {
2740 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
2741 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
2742 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
2743 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
2744 {NULL}
2745};
2746
2747/* FIXME: implement setattr("string", None) as a special case (to
2748 detach the associated string, if any */
2749
2750static PyTypeObject Match_Type = {
2751 PyVarObject_HEAD_INIT(NULL,0)
2752 "_" SRE_MODULE ".SRE_Match",
2753 sizeof(MatchObject), sizeof(Py_ssize_t),
2754 (destructor)match_dealloc, /* tp_dealloc */
2755 0, /* tp_print */
2756 0, /* tp_getattr */
2757 0, /* tp_setattr */
2758 0, /* tp_reserved */
2759 (reprfunc)match_repr, /* tp_repr */
2760 0, /* tp_as_number */
2761 0, /* tp_as_sequence */
2762 0, /* tp_as_mapping */
2763 0, /* tp_hash */
2764 0, /* tp_call */
2765 0, /* tp_str */
2766 0, /* tp_getattro */
2767 0, /* tp_setattro */
2768 0, /* tp_as_buffer */
2769 Py_TPFLAGS_DEFAULT, /* tp_flags */
2770 match_doc, /* tp_doc */
2771 0, /* tp_traverse */
2772 0, /* tp_clear */
2773 0, /* tp_richcompare */
2774 0, /* tp_weaklistoffset */
2775 0, /* tp_iter */
2776 0, /* tp_iternext */
2777 match_methods, /* tp_methods */
2778 match_members, /* tp_members */
2779 match_getset, /* tp_getset */
2780};
2781
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002782static PyMethodDef scanner_methods[] = {
2783 _SRE_SRE_SCANNER_MATCH_METHODDEF
2784 _SRE_SRE_SCANNER_SEARCH_METHODDEF
2785 {NULL, NULL}
2786};
2787
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002788#define SCAN_OFF(x) offsetof(ScannerObject, x)
2789static PyMemberDef scanner_members[] = {
2790 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2791 {NULL} /* Sentinel */
2792};
2793
2794static PyTypeObject Scanner_Type = {
2795 PyVarObject_HEAD_INIT(NULL, 0)
2796 "_" SRE_MODULE ".SRE_Scanner",
2797 sizeof(ScannerObject), 0,
2798 (destructor)scanner_dealloc,/* tp_dealloc */
2799 0, /* tp_print */
2800 0, /* tp_getattr */
2801 0, /* tp_setattr */
2802 0, /* tp_reserved */
2803 0, /* tp_repr */
2804 0, /* tp_as_number */
2805 0, /* tp_as_sequence */
2806 0, /* tp_as_mapping */
2807 0, /* tp_hash */
2808 0, /* tp_call */
2809 0, /* tp_str */
2810 0, /* tp_getattro */
2811 0, /* tp_setattro */
2812 0, /* tp_as_buffer */
2813 Py_TPFLAGS_DEFAULT, /* tp_flags */
2814 0, /* tp_doc */
2815 0, /* tp_traverse */
2816 0, /* tp_clear */
2817 0, /* tp_richcompare */
2818 0, /* tp_weaklistoffset */
2819 0, /* tp_iter */
2820 0, /* tp_iternext */
2821 scanner_methods, /* tp_methods */
2822 scanner_members, /* tp_members */
2823 0, /* tp_getset */
2824};
2825
Guido van Rossumb700df92000-03-31 14:59:30 +00002826static PyMethodDef _functions[] = {
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002827 _SRE_COMPILE_METHODDEF
2828 _SRE_GETCODESIZE_METHODDEF
2829 _SRE_GETLOWER_METHODDEF
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002830 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002831};
2832
Martin v. Löwis1a214512008-06-11 05:26:20 +00002833static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002834 PyModuleDef_HEAD_INIT,
2835 "_" SRE_MODULE,
2836 NULL,
2837 -1,
2838 _functions,
2839 NULL,
2840 NULL,
2841 NULL,
2842 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002843};
2844
2845PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002846{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002847 PyObject* m;
2848 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002849 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002850
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002851 /* Patch object types */
2852 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2853 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002854 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002855
Martin v. Löwis1a214512008-06-11 05:26:20 +00002856 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002857 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002858 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002859 d = PyModule_GetDict(m);
2860
Christian Heimes217cfd12007-12-02 14:31:20 +00002861 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002862 if (x) {
2863 PyDict_SetItemString(d, "MAGIC", x);
2864 Py_DECREF(x);
2865 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002866
Christian Heimes217cfd12007-12-02 14:31:20 +00002867 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002868 if (x) {
2869 PyDict_SetItemString(d, "CODESIZE", x);
2870 Py_DECREF(x);
2871 }
2872
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002873 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2874 if (x) {
2875 PyDict_SetItemString(d, "MAXREPEAT", x);
2876 Py_DECREF(x);
2877 }
2878
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03002879 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2880 if (x) {
2881 PyDict_SetItemString(d, "MAXGROUPS", x);
2882 Py_DECREF(x);
2883 }
2884
Neal Norwitzfe537132007-08-26 03:55:15 +00002885 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002886 if (x) {
2887 PyDict_SetItemString(d, "copyright", x);
2888 Py_DECREF(x);
2889 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002890 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002891}
2892
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002893/* vim:ts=4:sw=4:et
2894*/