blob: 6a3d8112267878b539f48dba0560f5803d1a46dd [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000038static char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000063/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000065/* enables copy/deepcopy handling (work in progress) */
66#undef USE_BUILTIN_COPY
67
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000068/* -------------------------------------------------------------------- */
69
Fredrik Lundh80946112000-06-29 18:03:25 +000070#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000071#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000072#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000073/* fastest possible local call under MSVC */
74#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000075#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000076#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000077#else
78#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000079#endif
80
81/* error codes */
82#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000083#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000084#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000085#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000086#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000087
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000088#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000089#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000090#else
91#define TRACE(v)
92#endif
93
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000094/* -------------------------------------------------------------------- */
95/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000096
Fredrik Lundh436c3d582000-06-29 08:58:44 +000097#define SRE_IS_DIGIT(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030098 ((ch) < 128 && Py_ISDIGIT(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000099#define SRE_IS_SPACE(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300100 ((ch) < 128 && Py_ISSPACE(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000101#define SRE_IS_LINEBREAK(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300102 ((ch) == '\n')
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000103#define SRE_IS_ALNUM(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300104 ((ch) < 128 && Py_ISALNUM(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000105#define SRE_IS_WORD(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300106 ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
Guido van Rossumb700df92000-03-31 14:59:30 +0000107
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000108static unsigned int sre_lower(unsigned int ch)
109{
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300110 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000111}
112
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200113static unsigned int sre_upper(unsigned int ch)
114{
115 return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
116}
117
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000118/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000119/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
120 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000121#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000122#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
123
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000124static unsigned int sre_lower_locale(unsigned int ch)
125{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000126 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000127}
128
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200129static unsigned int sre_upper_locale(unsigned int ch)
130{
131 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
132}
133
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000134/* unicode-specific character predicates */
135
Victor Stinner0058b862011-09-29 03:27:47 +0200136#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
137#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
138#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
139#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
140#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000141
142static unsigned int sre_lower_unicode(unsigned int ch)
143{
Victor Stinner0058b862011-09-29 03:27:47 +0200144 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000145}
146
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200147static unsigned int sre_upper_unicode(unsigned int ch)
148{
149 return (unsigned int) Py_UNICODE_TOUPPER(ch);
150}
151
Guido van Rossumb700df92000-03-31 14:59:30 +0000152LOCAL(int)
153sre_category(SRE_CODE category, unsigned int ch)
154{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000155 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000156
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000157 case SRE_CATEGORY_DIGIT:
158 return SRE_IS_DIGIT(ch);
159 case SRE_CATEGORY_NOT_DIGIT:
160 return !SRE_IS_DIGIT(ch);
161 case SRE_CATEGORY_SPACE:
162 return SRE_IS_SPACE(ch);
163 case SRE_CATEGORY_NOT_SPACE:
164 return !SRE_IS_SPACE(ch);
165 case SRE_CATEGORY_WORD:
166 return SRE_IS_WORD(ch);
167 case SRE_CATEGORY_NOT_WORD:
168 return !SRE_IS_WORD(ch);
169 case SRE_CATEGORY_LINEBREAK:
170 return SRE_IS_LINEBREAK(ch);
171 case SRE_CATEGORY_NOT_LINEBREAK:
172 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000173
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000174 case SRE_CATEGORY_LOC_WORD:
175 return SRE_LOC_IS_WORD(ch);
176 case SRE_CATEGORY_LOC_NOT_WORD:
177 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000178
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000179 case SRE_CATEGORY_UNI_DIGIT:
180 return SRE_UNI_IS_DIGIT(ch);
181 case SRE_CATEGORY_UNI_NOT_DIGIT:
182 return !SRE_UNI_IS_DIGIT(ch);
183 case SRE_CATEGORY_UNI_SPACE:
184 return SRE_UNI_IS_SPACE(ch);
185 case SRE_CATEGORY_UNI_NOT_SPACE:
186 return !SRE_UNI_IS_SPACE(ch);
187 case SRE_CATEGORY_UNI_WORD:
188 return SRE_UNI_IS_WORD(ch);
189 case SRE_CATEGORY_UNI_NOT_WORD:
190 return !SRE_UNI_IS_WORD(ch);
191 case SRE_CATEGORY_UNI_LINEBREAK:
192 return SRE_UNI_IS_LINEBREAK(ch);
193 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
194 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000195 }
196 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000197}
198
199/* helpers */
200
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000201static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000202data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000203{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000204 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000205 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000206 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000207 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000208 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000209}
210
211static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000212data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000213{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000214 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000215 minsize = state->data_stack_base+size;
216 cursize = state->data_stack_size;
217 if (cursize < minsize) {
218 void* stack;
219 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300220 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000221 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000222 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000223 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000224 return SRE_ERROR_MEMORY;
225 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000226 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000227 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000228 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000229 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000230}
231
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000232/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000233
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300234#define SRE_CHAR Py_UCS1
235#define SIZEOF_SRE_CHAR 1
236#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300237#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000238
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300239/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000240
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300241#define SRE_CHAR Py_UCS2
242#define SIZEOF_SRE_CHAR 2
243#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300244#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000245
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300246/* generate 32-bit unicode version */
247
248#define SRE_CHAR Py_UCS4
249#define SIZEOF_SRE_CHAR 4
250#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300251#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000252
253/* -------------------------------------------------------------------- */
254/* factories and destructors */
255
256/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100257static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300258static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
Guido van Rossumb700df92000-03-31 14:59:30 +0000259
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300260
261/*[clinic input]
262module _sre
263class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
264class _sre.SRE_Match "MatchObject *" "&Match_Type"
265class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
266[clinic start generated code]*/
267/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
268
Larry Hastings2d0a69a2015-05-03 14:49:19 -0700269static PyTypeObject Pattern_Type;
270static PyTypeObject Match_Type;
271static PyTypeObject Scanner_Type;
272
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300273/*[clinic input]
274_sre.getcodesize -> int
275[clinic start generated code]*/
276
277static int
278_sre_getcodesize_impl(PyModuleDef *module)
279/*[clinic end generated code: output=794f1f98ef4883e5 input=bd6f6ecf4916bb2b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000280{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300281 return sizeof(SRE_CODE);
Guido van Rossumb700df92000-03-31 14:59:30 +0000282}
283
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300284/*[clinic input]
285_sre.getlower -> int
286
287 character: int
288 flags: int
289 /
290
291[clinic start generated code]*/
292
293static int
294_sre_getlower_impl(PyModuleDef *module, int character, int flags)
295/*[clinic end generated code: output=5fc3616ae2a4c306 input=087d2f1c44bbca6f]*/
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000296{
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000297 if (flags & SRE_FLAG_LOCALE)
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300298 return sre_lower_locale(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000299 if (flags & SRE_FLAG_UNICODE)
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300300 return sre_lower_unicode(character);
301 return sre_lower(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000302}
303
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000304LOCAL(void)
305state_reset(SRE_STATE* state)
306{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000307 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000308 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000309
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000310 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000311 state->lastindex = -1;
312
313 state->repeat = NULL;
314
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000315 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000316}
317
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000318static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200319getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300320 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600321 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000322{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000323 /* given a python object, return a data pointer, a length (in
324 characters), and a character size. return NULL if the object
325 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000326
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000327 /* Unicode objects do not support the buffer API. So, get the data
328 directly instead. */
329 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200330 if (PyUnicode_READY(string) == -1)
331 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200332 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200333 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300334 *p_isbytes = 0;
335 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000336 }
337
Victor Stinner0058b862011-09-29 03:27:47 +0200338 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300339 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200340 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300341 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000342 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000343
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300344 *p_length = view->len;
345 *p_charsize = 1;
346 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000347
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300348 if (view->buf == NULL) {
349 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
350 PyBuffer_Release(view);
351 view->buf = NULL;
352 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000353 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300354 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000355}
356
357LOCAL(PyObject*)
358state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000359 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000360{
361 /* prepare state object */
362
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000363 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300364 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000365 void* ptr;
366
367 memset(state, 0, sizeof(SRE_STATE));
368
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300369 state->mark = PyMem_New(void *, pattern->groups * 2);
370 if (!state->mark) {
371 PyErr_NoMemory();
372 goto err;
373 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000374 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000375 state->lastindex = -1;
376
Benjamin Petersone48944b2012-03-07 14:50:25 -0600377 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300378 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000379 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600380 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000381
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300382 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600383 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200384 "cannot use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600385 goto err;
386 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300387 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600388 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200389 "cannot use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600390 goto err;
391 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000392
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000393 /* adjust boundaries */
394 if (start < 0)
395 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000396 else if (start > length)
397 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000398
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000399 if (end < 0)
400 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000401 else if (end > length)
402 end = length;
403
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300404 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000405 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000406
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000407 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000408
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000409 state->start = (void*) ((char*) ptr + start * state->charsize);
410 state->end = (void*) ((char*) ptr + end * state->charsize);
411
412 Py_INCREF(string);
413 state->string = string;
414 state->pos = start;
415 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000416
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200417 if (pattern->flags & SRE_FLAG_LOCALE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000418 state->lower = sre_lower_locale;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200419 state->upper = sre_upper_locale;
420 }
421 else if (pattern->flags & SRE_FLAG_UNICODE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000422 state->lower = sre_lower_unicode;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200423 state->upper = sre_upper_unicode;
424 }
425 else {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000426 state->lower = sre_lower;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200427 state->upper = sre_upper;
428 }
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000429
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000430 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600431 err:
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300432 PyMem_Del(state->mark);
433 state->mark = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600434 if (state->buffer.buf)
435 PyBuffer_Release(&state->buffer);
436 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000437}
438
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000439LOCAL(void)
440state_fini(SRE_STATE* state)
441{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600442 if (state->buffer.buf)
443 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000444 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000445 data_stack_dealloc(state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300446 PyMem_Del(state->mark);
447 state->mark = NULL;
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000448}
449
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000450/* calculate offset from start of string */
451#define STATE_OFFSET(state, member)\
452 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
453
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000454LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300455getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300456 PyObject* string, Py_ssize_t start, Py_ssize_t end)
457{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300458 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300459 if (PyBytes_CheckExact(string) &&
460 start == 0 && end == PyBytes_GET_SIZE(string)) {
461 Py_INCREF(string);
462 return string;
463 }
464 return PyBytes_FromStringAndSize(
465 (const char *)ptr + start, end - start);
466 }
467 else {
468 return PyUnicode_Substring(string, start, end);
469 }
470}
471
472LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000473state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000474{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000475 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000476
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000477 index = (index - 1) * 2;
478
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000479 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000480 if (empty)
481 /* want empty string */
482 i = j = 0;
483 else {
484 Py_INCREF(Py_None);
485 return Py_None;
486 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000487 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000488 i = STATE_OFFSET(state, state->mark[index]);
489 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000490 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000491
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300492 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000493}
494
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000495static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100496pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000497{
498 switch (status) {
499 case SRE_ERROR_RECURSION_LIMIT:
Yury Selivanovf488fb42015-07-03 01:04:23 -0400500 /* This error code seems to be unused. */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000501 PyErr_SetString(
Yury Selivanovf488fb42015-07-03 01:04:23 -0400502 PyExc_RecursionError,
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000503 "maximum recursion limit exceeded"
504 );
505 break;
506 case SRE_ERROR_MEMORY:
507 PyErr_NoMemory();
508 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000509 case SRE_ERROR_INTERRUPTED:
510 /* An exception has already been raised, so let it fly */
511 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000512 default:
513 /* other error codes indicate compiler/engine bugs */
514 PyErr_SetString(
515 PyExc_RuntimeError,
516 "internal error in regular expression engine"
517 );
518 }
519}
520
Guido van Rossumb700df92000-03-31 14:59:30 +0000521static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000522pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000523{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000524 if (self->weakreflist != NULL)
525 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000526 Py_XDECREF(self->pattern);
527 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000528 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000529 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000530}
531
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300532LOCAL(Py_ssize_t)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300533sre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300534{
535 if (state->charsize == 1)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300536 return sre_ucs1_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300537 if (state->charsize == 2)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300538 return sre_ucs2_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300539 assert(state->charsize == 4);
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300540 return sre_ucs4_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300541}
542
543LOCAL(Py_ssize_t)
544sre_search(SRE_STATE* state, SRE_CODE* pattern)
545{
546 if (state->charsize == 1)
547 return sre_ucs1_search(state, pattern);
548 if (state->charsize == 2)
549 return sre_ucs2_search(state, pattern);
550 assert(state->charsize == 4);
551 return sre_ucs4_search(state, pattern);
552}
553
Larry Hastings16c51912014-01-07 11:53:01 -0800554static PyObject *
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200555fix_string_param(PyObject *string, PyObject *string2, const char *oldname)
556{
557 if (string2 != NULL) {
558 if (string != NULL) {
559 PyErr_Format(PyExc_TypeError,
560 "Argument given by name ('%s') and position (1)",
561 oldname);
562 return NULL;
563 }
564 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
565 "The '%s' keyword parameter name is deprecated. "
566 "Use 'string' instead.", oldname) < 0)
567 return NULL;
568 return string2;
569 }
570 if (string == NULL) {
571 PyErr_SetString(PyExc_TypeError,
572 "Required argument 'string' (pos 1) not found");
573 return NULL;
574 }
575 return string;
576}
Larry Hastings16c51912014-01-07 11:53:01 -0800577
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300578/*[clinic input]
579_sre.SRE_Pattern.match
580
581 string: object = NULL
582 pos: Py_ssize_t = 0
583 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
584 *
585 pattern: object = NULL
586
587Matches zero or more characters at the beginning of the string.
588[clinic start generated code]*/
589
Larry Hastings16c51912014-01-07 11:53:01 -0800590static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300591_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
592 Py_ssize_t pos, Py_ssize_t endpos,
593 PyObject *pattern)
594/*[clinic end generated code: output=74b4b1da3bb2d84e input=3d079aa99979b81d]*/
Larry Hastings16c51912014-01-07 11:53:01 -0800595{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000596 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100597 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300598 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000599
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200600 string = fix_string_param(string, pattern, "pattern");
601 if (!string)
602 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300603 if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000604 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000605
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000606 state.ptr = state.start;
607
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000608 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
609
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300610 status = sre_match(&state, PatternObject_GetCode(self), 0);
Guido van Rossumb700df92000-03-31 14:59:30 +0000611
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000612 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300613 if (PyErr_Occurred()) {
614 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000615 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300616 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000617
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300618 match = pattern_new_match(self, &state, status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000619 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300620 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000621}
622
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300623/*[clinic input]
624_sre.SRE_Pattern.fullmatch
625
626 string: object = NULL
627 pos: Py_ssize_t = 0
628 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
629 *
630 pattern: object = NULL
631
632Matches against all of the string
633[clinic start generated code]*/
634
635static PyObject *
636_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
637 Py_ssize_t pos, Py_ssize_t endpos,
638 PyObject *pattern)
639/*[clinic end generated code: output=1c98bc5da744ea94 input=d4228606cc12580f]*/
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200640{
641 SRE_STATE state;
642 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300643 PyObject *match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200644
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300645 string = fix_string_param(string, pattern, "pattern");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200646 if (!string)
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200647 return NULL;
648
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300649 if (!state_init(&state, self, string, pos, endpos))
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200650 return NULL;
651
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200652 state.ptr = state.start;
653
654 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
655
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300656 status = sre_match(&state, PatternObject_GetCode(self), 1);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200657
658 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300659 if (PyErr_Occurred()) {
660 state_fini(&state);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200661 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300662 }
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200663
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300664 match = pattern_new_match(self, &state, status);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200665 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300666 return match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200667}
668
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300669/*[clinic input]
670_sre.SRE_Pattern.search
671
672 string: object = NULL
673 pos: Py_ssize_t = 0
674 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
675 *
676 pattern: object = NULL
677
678Scan through string looking for a match, and return a corresponding match object instance.
679
680Return None if no position in the string matches.
681[clinic start generated code]*/
682
683static PyObject *
684_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
685 Py_ssize_t pos, Py_ssize_t endpos,
686 PyObject *pattern)
687/*[clinic end generated code: output=3839394a18e5ea4f input=dab42720f4be3a4b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000688{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000689 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100690 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300691 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000692
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300693 string = fix_string_param(string, pattern, "pattern");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200694 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000695 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000696
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300697 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000698 return NULL;
699
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000700 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
701
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300702 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000703
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000704 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
705
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300706 if (PyErr_Occurred()) {
707 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000708 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300709 }
Thomas Wouters89f507f2006-12-13 04:49:30 +0000710
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300711 match = pattern_new_match(self, &state, status);
712 state_fini(&state);
713 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000714}
715
716static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000717call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000718{
719 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000720 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000721 PyObject* func;
722 PyObject* result;
723
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000724 if (!args)
725 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000726 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000727 if (!name)
728 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000729 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000730 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000731 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000732 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000733 func = PyObject_GetAttrString(mod, function);
734 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000735 if (!func)
736 return NULL;
737 result = PyObject_CallObject(func, args);
738 Py_DECREF(func);
739 Py_DECREF(args);
740 return result;
741}
742
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000743#ifdef USE_BUILTIN_COPY
744static int
745deepcopy(PyObject** object, PyObject* memo)
746{
747 PyObject* copy;
748
749 copy = call(
750 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000751 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000752 );
753 if (!copy)
754 return 0;
755
756 Py_DECREF(*object);
757 *object = copy;
758
759 return 1; /* success */
760}
761#endif
762
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300763/*[clinic input]
764_sre.SRE_Pattern.findall
765
766 string: object = NULL
767 pos: Py_ssize_t = 0
768 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
769 *
770 source: object = NULL
771
772Return a list of all non-overlapping matches of pattern in string.
773[clinic start generated code]*/
774
775static PyObject *
776_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
777 Py_ssize_t pos, Py_ssize_t endpos,
778 PyObject *source)
779/*[clinic end generated code: output=51295498b300639d input=df688355c056b9de]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000780{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000781 SRE_STATE state;
782 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100783 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000784 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000785
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300786 string = fix_string_param(string, source, "source");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200787 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000788 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000789
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300790 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000791 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000792
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000793 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000794 if (!list) {
795 state_fini(&state);
796 return NULL;
797 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000798
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000799 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000800
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000801 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000802
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000803 state_reset(&state);
804
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000805 state.ptr = state.start;
806
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300807 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300808 if (PyErr_Occurred())
809 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000810
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000811 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000812 if (status == 0)
813 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000814 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000815 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000816 }
Tim Peters3d563502006-01-21 02:47:53 +0000817
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000818 /* don't bother to build a match object */
819 switch (self->groups) {
820 case 0:
821 b = STATE_OFFSET(&state, state.start);
822 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300823 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300824 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000825 if (!item)
826 goto error;
827 break;
828 case 1:
829 item = state_getslice(&state, 1, string, 1);
830 if (!item)
831 goto error;
832 break;
833 default:
834 item = PyTuple_New(self->groups);
835 if (!item)
836 goto error;
837 for (i = 0; i < self->groups; i++) {
838 PyObject* o = state_getslice(&state, i+1, string, 1);
839 if (!o) {
840 Py_DECREF(item);
841 goto error;
842 }
843 PyTuple_SET_ITEM(item, i, o);
844 }
845 break;
846 }
847
848 status = PyList_Append(list, item);
849 Py_DECREF(item);
850 if (status < 0)
851 goto error;
852
853 if (state.ptr == state.start)
854 state.start = (void*) ((char*) state.ptr + state.charsize);
855 else
856 state.start = state.ptr;
857
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000858 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000859
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000860 state_fini(&state);
861 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000862
863error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000864 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000865 state_fini(&state);
866 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000867
Guido van Rossumb700df92000-03-31 14:59:30 +0000868}
869
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300870/*[clinic input]
871_sre.SRE_Pattern.finditer
872
873 string: object
874 pos: Py_ssize_t = 0
875 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
876
877Return an iterator over all non-overlapping matches for the RE pattern in string.
878
879For each match, the iterator returns a match object.
880[clinic start generated code]*/
881
882static PyObject *
883_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
884 Py_ssize_t pos, Py_ssize_t endpos)
885/*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
Fredrik Lundh703ce812001-10-24 22:16:30 +0000886{
887 PyObject* scanner;
888 PyObject* search;
889 PyObject* iterator;
890
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300891 scanner = pattern_scanner(self, string, pos, endpos);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000892 if (!scanner)
893 return NULL;
894
895 search = PyObject_GetAttrString(scanner, "search");
896 Py_DECREF(scanner);
897 if (!search)
898 return NULL;
899
900 iterator = PyCallIter_New(search, Py_None);
901 Py_DECREF(search);
902
903 return iterator;
904}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000905
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300906/*[clinic input]
907_sre.SRE_Pattern.scanner
908
909 string: object
910 pos: Py_ssize_t = 0
911 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
912
913[clinic start generated code]*/
914
915static PyObject *
916_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
917 Py_ssize_t pos, Py_ssize_t endpos)
918/*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
919{
920 return pattern_scanner(self, string, pos, endpos);
921}
922
923/*[clinic input]
924_sre.SRE_Pattern.split
925
926 string: object = NULL
927 maxsplit: Py_ssize_t = 0
928 *
929 source: object = NULL
930
931Split string by the occurrences of pattern.
932[clinic start generated code]*/
933
934static PyObject *
935_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
936 Py_ssize_t maxsplit, PyObject *source)
937/*[clinic end generated code: output=20bac2ff55b9f84c input=41e0b2e35e599d7b]*/
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000938{
939 SRE_STATE state;
940 PyObject* list;
941 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100942 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000943 Py_ssize_t n;
944 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000945 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000946
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300947 string = fix_string_param(string, source, "source");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200948 if (!string)
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000949 return NULL;
950
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200951 assert(self->codesize != 0);
952 if (self->code[0] != SRE_OP_INFO || self->code[3] == 0) {
953 if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) {
954 PyErr_SetString(PyExc_ValueError,
955 "split() requires a non-empty pattern match.");
956 return NULL;
957 }
958 if (PyErr_WarnEx(PyExc_FutureWarning,
959 "split() requires a non-empty pattern match.",
960 1) < 0)
961 return NULL;
962 }
963
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300964 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000965 return NULL;
966
967 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000968 if (!list) {
969 state_fini(&state);
970 return NULL;
971 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000972
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000973 n = 0;
974 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000975
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000976 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000977
978 state_reset(&state);
979
980 state.ptr = state.start;
981
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300982 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300983 if (PyErr_Occurred())
984 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000985
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000986 if (status <= 0) {
987 if (status == 0)
988 break;
989 pattern_error(status);
990 goto error;
991 }
Tim Peters3d563502006-01-21 02:47:53 +0000992
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000993 if (state.start == state.ptr) {
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +0300994 if (last == state.end || state.ptr == state.end)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000995 break;
996 /* skip one character */
997 state.start = (void*) ((char*) state.ptr + state.charsize);
998 continue;
999 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001000
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001001 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001002 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001003 string, STATE_OFFSET(&state, last),
1004 STATE_OFFSET(&state, state.start)
1005 );
1006 if (!item)
1007 goto error;
1008 status = PyList_Append(list, item);
1009 Py_DECREF(item);
1010 if (status < 0)
1011 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001012
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001013 /* add groups (if any) */
1014 for (i = 0; i < self->groups; i++) {
1015 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001016 if (!item)
1017 goto error;
1018 status = PyList_Append(list, item);
1019 Py_DECREF(item);
1020 if (status < 0)
1021 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001022 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001023
1024 n = n + 1;
1025
1026 last = state.start = state.ptr;
1027
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001028 }
1029
Fredrik Lundhf864aa82001-10-22 06:01:56 +00001030 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001031 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +00001032 string, STATE_OFFSET(&state, last), state.endpos
1033 );
1034 if (!item)
1035 goto error;
1036 status = PyList_Append(list, item);
1037 Py_DECREF(item);
1038 if (status < 0)
1039 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001040
1041 state_fini(&state);
1042 return list;
1043
1044error:
1045 Py_DECREF(list);
1046 state_fini(&state);
1047 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001048
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001049}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001050
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001051static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001052pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001053 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001054{
1055 SRE_STATE state;
1056 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001057 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001058 PyObject* item;
1059 PyObject* filter;
1060 PyObject* args;
1061 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001062 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01001063 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001064 Py_ssize_t n;
1065 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001066 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001067 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001068 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001069
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001070 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001071 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001072 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001073 Py_INCREF(filter);
1074 filter_is_callable = 1;
1075 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001076 /* if not callable, check if it's a literal string */
1077 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001078 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001079 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001080 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001081 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001082 if (charsize == 1)
1083 literal = memchr(ptr, '\\', n) == NULL;
1084 else
1085 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001086 } else {
1087 PyErr_Clear();
1088 literal = 0;
1089 }
Benjamin Petersone48944b2012-03-07 14:50:25 -06001090 if (view.buf)
1091 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001092 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001093 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001094 Py_INCREF(filter);
1095 filter_is_callable = 0;
1096 } else {
1097 /* not a literal; hand it over to the template compiler */
1098 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001099 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001100 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001101 );
1102 if (!filter)
1103 return NULL;
1104 filter_is_callable = PyCallable_Check(filter);
1105 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00001106 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001107
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001108 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001109 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001110 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001111 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001112
1113 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001114 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001115 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001116 state_fini(&state);
1117 return NULL;
1118 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001119
1120 n = i = 0;
1121
1122 while (!count || n < count) {
1123
1124 state_reset(&state);
1125
1126 state.ptr = state.start;
1127
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001128 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001129 if (PyErr_Occurred())
1130 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001131
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001132 if (status <= 0) {
1133 if (status == 0)
1134 break;
1135 pattern_error(status);
1136 goto error;
1137 }
Tim Peters3d563502006-01-21 02:47:53 +00001138
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001139 b = STATE_OFFSET(&state, state.start);
1140 e = STATE_OFFSET(&state, state.ptr);
1141
1142 if (i < b) {
1143 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001144 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001145 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001146 if (!item)
1147 goto error;
1148 status = PyList_Append(list, item);
1149 Py_DECREF(item);
1150 if (status < 0)
1151 goto error;
1152
1153 } else if (i == b && i == e && n > 0)
1154 /* ignore empty match on latest position */
1155 goto next;
1156
1157 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001158 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001159 match = pattern_new_match(self, &state, 1);
1160 if (!match)
1161 goto error;
Raymond Hettinger8ae46892003-10-12 19:09:37 +00001162 args = PyTuple_Pack(1, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001163 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00001164 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001165 goto error;
1166 }
1167 item = PyObject_CallObject(filter, args);
1168 Py_DECREF(args);
1169 Py_DECREF(match);
1170 if (!item)
1171 goto error;
1172 } else {
1173 /* filter is literal string */
1174 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001175 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001176 }
1177
1178 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001179 if (item != Py_None) {
1180 status = PyList_Append(list, item);
1181 Py_DECREF(item);
1182 if (status < 0)
1183 goto error;
1184 }
Tim Peters3d563502006-01-21 02:47:53 +00001185
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001186 i = e;
1187 n = n + 1;
1188
1189next:
1190 /* move on */
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001191 if (state.ptr == state.end)
1192 break;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001193 if (state.ptr == state.start)
1194 state.start = (void*) ((char*) state.ptr + state.charsize);
1195 else
1196 state.start = state.ptr;
1197
1198 }
1199
1200 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001201 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001202 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001203 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001204 if (!item)
1205 goto error;
1206 status = PyList_Append(list, item);
1207 Py_DECREF(item);
1208 if (status < 0)
1209 goto error;
1210 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001211
1212 state_fini(&state);
1213
Guido van Rossum4e173842001-12-07 04:25:10 +00001214 Py_DECREF(filter);
1215
Fredrik Lundhdac58492001-10-21 21:48:30 +00001216 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001217 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001218 if (!joiner) {
1219 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001220 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001221 }
1222 if (PyList_GET_SIZE(list) == 0) {
1223 Py_DECREF(list);
1224 item = joiner;
1225 }
1226 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001227 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001228 item = _PyBytes_Join(joiner, list);
1229 else
1230 item = PyUnicode_Join(joiner, list);
1231 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001232 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001233 if (!item)
1234 return NULL;
1235 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001236
1237 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001238 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001239
1240 return item;
1241
1242error:
1243 Py_DECREF(list);
1244 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001245 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001246 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001247
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001248}
1249
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001250/*[clinic input]
1251_sre.SRE_Pattern.sub
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001252
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001253 repl: object
1254 string: object
1255 count: Py_ssize_t = 0
1256
1257Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1258[clinic start generated code]*/
1259
1260static PyObject *
1261_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1262 PyObject *string, Py_ssize_t count)
1263/*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1264{
1265 return pattern_subx(self, repl, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001266}
1267
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001268/*[clinic input]
1269_sre.SRE_Pattern.subn
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001270
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001271 repl: object
1272 string: object
1273 count: Py_ssize_t = 0
1274
1275Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1276[clinic start generated code]*/
1277
1278static PyObject *
1279_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1280 PyObject *string, Py_ssize_t count)
1281/*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1282{
1283 return pattern_subx(self, repl, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001284}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001285
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001286/*[clinic input]
1287_sre.SRE_Pattern.__copy__
1288
1289[clinic start generated code]*/
1290
1291static PyObject *
1292_sre_SRE_Pattern___copy___impl(PatternObject *self)
1293/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001294{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001295#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001296 PatternObject* copy;
1297 int offset;
1298
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001299 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
1300 if (!copy)
1301 return NULL;
1302
1303 offset = offsetof(PatternObject, groups);
1304
1305 Py_XINCREF(self->groupindex);
1306 Py_XINCREF(self->indexgroup);
1307 Py_XINCREF(self->pattern);
1308
1309 memcpy((char*) copy + offset, (char*) self + offset,
1310 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00001311 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001312
1313 return (PyObject*) copy;
1314#else
1315 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
1316 return NULL;
1317#endif
1318}
1319
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001320/*[clinic input]
1321_sre.SRE_Pattern.__deepcopy__
1322
1323 memo: object
1324
1325[clinic start generated code]*/
1326
1327static PyObject *
1328_sre_SRE_Pattern___deepcopy___impl(PatternObject *self, PyObject *memo)
1329/*[clinic end generated code: output=75efe69bd12c5d7d input=3959719482c07f70]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001330{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001331#ifdef USE_BUILTIN_COPY
1332 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00001333
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001334 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001335 if (!copy)
1336 return NULL;
1337
1338 if (!deepcopy(&copy->groupindex, memo) ||
1339 !deepcopy(&copy->indexgroup, memo) ||
1340 !deepcopy(&copy->pattern, memo)) {
1341 Py_DECREF(copy);
1342 return NULL;
1343 }
1344
1345#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001346 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
1347 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001348#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001349}
1350
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001351static PyObject *
1352pattern_repr(PatternObject *obj)
1353{
1354 static const struct {
1355 const char *name;
1356 int value;
1357 } flag_names[] = {
1358 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1359 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1360 {"re.LOCALE", SRE_FLAG_LOCALE},
1361 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1362 {"re.DOTALL", SRE_FLAG_DOTALL},
1363 {"re.UNICODE", SRE_FLAG_UNICODE},
1364 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1365 {"re.DEBUG", SRE_FLAG_DEBUG},
1366 {"re.ASCII", SRE_FLAG_ASCII},
1367 };
1368 PyObject *result = NULL;
1369 PyObject *flag_items;
Victor Stinner706768c2014-08-16 01:03:39 +02001370 size_t i;
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001371 int flags = obj->flags;
1372
1373 /* Omit re.UNICODE for valid string patterns. */
1374 if (obj->isbytes == 0 &&
1375 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1376 SRE_FLAG_UNICODE)
1377 flags &= ~SRE_FLAG_UNICODE;
1378
1379 flag_items = PyList_New(0);
1380 if (!flag_items)
1381 return NULL;
1382
1383 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1384 if (flags & flag_names[i].value) {
1385 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1386 if (!item)
1387 goto done;
1388
1389 if (PyList_Append(flag_items, item) < 0) {
1390 Py_DECREF(item);
1391 goto done;
1392 }
1393 Py_DECREF(item);
1394 flags &= ~flag_names[i].value;
1395 }
1396 }
1397 if (flags) {
1398 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1399 if (!item)
1400 goto done;
1401
1402 if (PyList_Append(flag_items, item) < 0) {
1403 Py_DECREF(item);
1404 goto done;
1405 }
1406 Py_DECREF(item);
1407 }
1408
1409 if (PyList_Size(flag_items) > 0) {
1410 PyObject *flags_result;
1411 PyObject *sep = PyUnicode_FromString("|");
1412 if (!sep)
1413 goto done;
1414 flags_result = PyUnicode_Join(sep, flag_items);
1415 Py_DECREF(sep);
1416 if (!flags_result)
1417 goto done;
1418 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1419 obj->pattern, flags_result);
1420 Py_DECREF(flags_result);
1421 }
1422 else {
1423 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1424 }
1425
1426done:
1427 Py_DECREF(flag_items);
1428 return result;
1429}
1430
Raymond Hettinger94478742004-09-24 04:31:19 +00001431PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
1432
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001433/* PatternObject's 'groupindex' method. */
1434static PyObject *
1435pattern_groupindex(PatternObject *self)
1436{
1437 return PyDictProxy_New(self->groupindex);
1438}
1439
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001440static int _validate(PatternObject *self); /* Forward */
1441
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001442/*[clinic input]
1443_sre.compile
1444
1445 pattern: object
1446 flags: int
1447 code: object(subclass_of='&PyList_Type')
1448 groups: Py_ssize_t
1449 groupindex: object
1450 indexgroup: object
1451
1452[clinic start generated code]*/
1453
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001454static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001455_sre_compile_impl(PyModuleDef *module, PyObject *pattern, int flags,
1456 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1457 PyObject *indexgroup)
1458/*[clinic end generated code: output=3004b293730bf309 input=7d059ec8ae1edb85]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001459{
1460 /* "compile" pattern descriptor to pattern object */
1461
1462 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001463 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001464
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001465 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001466 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001467 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1468 if (!self)
1469 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001470 self->weakreflist = NULL;
1471 self->pattern = NULL;
1472 self->groupindex = NULL;
1473 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001474
1475 self->codesize = n;
1476
1477 for (i = 0; i < n; i++) {
1478 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001479 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001480 self->code[i] = (SRE_CODE) value;
1481 if ((unsigned long) self->code[i] != value) {
1482 PyErr_SetString(PyExc_OverflowError,
1483 "regular expression code size limit exceeded");
1484 break;
1485 }
1486 }
1487
1488 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001489 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001490 return NULL;
1491 }
1492
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001493 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001494 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001495 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001496 else {
1497 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001498 int charsize;
1499 Py_buffer view;
1500 view.buf = NULL;
1501 if (!getstring(pattern, &p_length, &self->isbytes,
1502 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001503 Py_DECREF(self);
1504 return NULL;
1505 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001506 if (view.buf)
1507 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001508 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001509
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001510 Py_INCREF(pattern);
1511 self->pattern = pattern;
1512
1513 self->flags = flags;
1514
1515 self->groups = groups;
1516
1517 Py_XINCREF(groupindex);
1518 self->groupindex = groupindex;
1519
1520 Py_XINCREF(indexgroup);
1521 self->indexgroup = indexgroup;
1522
1523 self->weakreflist = NULL;
1524
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001525 if (!_validate(self)) {
1526 Py_DECREF(self);
1527 return NULL;
1528 }
1529
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001530 return (PyObject*) self;
1531}
1532
Guido van Rossumb700df92000-03-31 14:59:30 +00001533/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001534/* Code validation */
1535
1536/* To learn more about this code, have a look at the _compile() function in
1537 Lib/sre_compile.py. The validation functions below checks the code array
1538 for conformance with the code patterns generated there.
1539
1540 The nice thing about the generated code is that it is position-independent:
1541 all jumps are relative jumps forward. Also, jumps don't cross each other:
1542 the target of a later jump is always earlier than the target of an earlier
1543 jump. IOW, this is okay:
1544
1545 J---------J-------T--------T
1546 \ \_____/ /
1547 \______________________/
1548
1549 but this is not:
1550
1551 J---------J-------T--------T
1552 \_________\_____/ /
1553 \____________/
1554
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001555 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001556*/
1557
1558/* Defining this one enables tracing of the validator */
1559#undef VVERBOSE
1560
1561/* Trace macro for the validator */
1562#if defined(VVERBOSE)
1563#define VTRACE(v) printf v
1564#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001565#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001566#endif
1567
1568/* Report failure */
1569#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1570
1571/* Extract opcode, argument, or skip count from code array */
1572#define GET_OP \
1573 do { \
1574 VTRACE(("%p: ", code)); \
1575 if (code >= end) FAIL; \
1576 op = *code++; \
1577 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1578 } while (0)
1579#define GET_ARG \
1580 do { \
1581 VTRACE(("%p= ", code)); \
1582 if (code >= end) FAIL; \
1583 arg = *code++; \
1584 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1585 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001586#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001587 do { \
1588 VTRACE(("%p= ", code)); \
1589 if (code >= end) FAIL; \
1590 skip = *code; \
1591 VTRACE(("%lu (skip to %p)\n", \
1592 (unsigned long)skip, code+skip)); \
Victor Stinner1fa174a2013-08-28 02:06:21 +02001593 if (skip-adj > (Py_uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001594 FAIL; \
1595 code++; \
1596 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001597#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001598
1599static int
1600_validate_charset(SRE_CODE *code, SRE_CODE *end)
1601{
1602 /* Some variables are manipulated by the macros above */
1603 SRE_CODE op;
1604 SRE_CODE arg;
1605 SRE_CODE offset;
1606 int i;
1607
1608 while (code < end) {
1609 GET_OP;
1610 switch (op) {
1611
1612 case SRE_OP_NEGATE:
1613 break;
1614
1615 case SRE_OP_LITERAL:
1616 GET_ARG;
1617 break;
1618
1619 case SRE_OP_RANGE:
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +02001620 case SRE_OP_RANGE_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001621 GET_ARG;
1622 GET_ARG;
1623 break;
1624
1625 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001626 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001627 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001628 FAIL;
1629 code += offset;
1630 break;
1631
1632 case SRE_OP_BIGCHARSET:
1633 GET_ARG; /* Number of blocks */
1634 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001635 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001636 FAIL;
1637 /* Make sure that each byte points to a valid block */
1638 for (i = 0; i < 256; i++) {
1639 if (((unsigned char *)code)[i] >= arg)
1640 FAIL;
1641 }
1642 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001643 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001644 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001645 FAIL;
1646 code += offset;
1647 break;
1648
1649 case SRE_OP_CATEGORY:
1650 GET_ARG;
1651 switch (arg) {
1652 case SRE_CATEGORY_DIGIT:
1653 case SRE_CATEGORY_NOT_DIGIT:
1654 case SRE_CATEGORY_SPACE:
1655 case SRE_CATEGORY_NOT_SPACE:
1656 case SRE_CATEGORY_WORD:
1657 case SRE_CATEGORY_NOT_WORD:
1658 case SRE_CATEGORY_LINEBREAK:
1659 case SRE_CATEGORY_NOT_LINEBREAK:
1660 case SRE_CATEGORY_LOC_WORD:
1661 case SRE_CATEGORY_LOC_NOT_WORD:
1662 case SRE_CATEGORY_UNI_DIGIT:
1663 case SRE_CATEGORY_UNI_NOT_DIGIT:
1664 case SRE_CATEGORY_UNI_SPACE:
1665 case SRE_CATEGORY_UNI_NOT_SPACE:
1666 case SRE_CATEGORY_UNI_WORD:
1667 case SRE_CATEGORY_UNI_NOT_WORD:
1668 case SRE_CATEGORY_UNI_LINEBREAK:
1669 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1670 break;
1671 default:
1672 FAIL;
1673 }
1674 break;
1675
1676 default:
1677 FAIL;
1678
1679 }
1680 }
1681
1682 return 1;
1683}
1684
1685static int
1686_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1687{
1688 /* Some variables are manipulated by the macros above */
1689 SRE_CODE op;
1690 SRE_CODE arg;
1691 SRE_CODE skip;
1692
1693 VTRACE(("code=%p, end=%p\n", code, end));
1694
1695 if (code > end)
1696 FAIL;
1697
1698 while (code < end) {
1699 GET_OP;
1700 switch (op) {
1701
1702 case SRE_OP_MARK:
1703 /* We don't check whether marks are properly nested; the
1704 sre_match() code is robust even if they don't, and the worst
1705 you can get is nonsensical match results. */
1706 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001707 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001708 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1709 FAIL;
1710 }
1711 break;
1712
1713 case SRE_OP_LITERAL:
1714 case SRE_OP_NOT_LITERAL:
1715 case SRE_OP_LITERAL_IGNORE:
1716 case SRE_OP_NOT_LITERAL_IGNORE:
1717 GET_ARG;
1718 /* The arg is just a character, nothing to check */
1719 break;
1720
1721 case SRE_OP_SUCCESS:
1722 case SRE_OP_FAILURE:
1723 /* Nothing to check; these normally end the matching process */
1724 break;
1725
1726 case SRE_OP_AT:
1727 GET_ARG;
1728 switch (arg) {
1729 case SRE_AT_BEGINNING:
1730 case SRE_AT_BEGINNING_STRING:
1731 case SRE_AT_BEGINNING_LINE:
1732 case SRE_AT_END:
1733 case SRE_AT_END_LINE:
1734 case SRE_AT_END_STRING:
1735 case SRE_AT_BOUNDARY:
1736 case SRE_AT_NON_BOUNDARY:
1737 case SRE_AT_LOC_BOUNDARY:
1738 case SRE_AT_LOC_NON_BOUNDARY:
1739 case SRE_AT_UNI_BOUNDARY:
1740 case SRE_AT_UNI_NON_BOUNDARY:
1741 break;
1742 default:
1743 FAIL;
1744 }
1745 break;
1746
1747 case SRE_OP_ANY:
1748 case SRE_OP_ANY_ALL:
1749 /* These have no operands */
1750 break;
1751
1752 case SRE_OP_IN:
1753 case SRE_OP_IN_IGNORE:
1754 GET_SKIP;
1755 /* Stop 1 before the end; we check the FAILURE below */
1756 if (!_validate_charset(code, code+skip-2))
1757 FAIL;
1758 if (code[skip-2] != SRE_OP_FAILURE)
1759 FAIL;
1760 code += skip-1;
1761 break;
1762
1763 case SRE_OP_INFO:
1764 {
1765 /* A minimal info field is
1766 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1767 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1768 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001769 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001770 SRE_CODE *newcode;
1771 GET_SKIP;
1772 newcode = code+skip-1;
1773 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001774 GET_ARG;
1775 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001776 /* Check that only valid flags are present */
1777 if ((flags & ~(SRE_INFO_PREFIX |
1778 SRE_INFO_LITERAL |
1779 SRE_INFO_CHARSET)) != 0)
1780 FAIL;
1781 /* PREFIX and CHARSET are mutually exclusive */
1782 if ((flags & SRE_INFO_PREFIX) &&
1783 (flags & SRE_INFO_CHARSET))
1784 FAIL;
1785 /* LITERAL implies PREFIX */
1786 if ((flags & SRE_INFO_LITERAL) &&
1787 !(flags & SRE_INFO_PREFIX))
1788 FAIL;
1789 /* Validate the prefix */
1790 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001791 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001792 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001793 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001794 /* Here comes the prefix string */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001795 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001796 FAIL;
1797 code += prefix_len;
1798 /* And here comes the overlap table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001799 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001800 FAIL;
1801 /* Each overlap value should be < prefix_len */
1802 for (i = 0; i < prefix_len; i++) {
1803 if (code[i] >= prefix_len)
1804 FAIL;
1805 }
1806 code += prefix_len;
1807 }
1808 /* Validate the charset */
1809 if (flags & SRE_INFO_CHARSET) {
1810 if (!_validate_charset(code, newcode-1))
1811 FAIL;
1812 if (newcode[-1] != SRE_OP_FAILURE)
1813 FAIL;
1814 code = newcode;
1815 }
1816 else if (code != newcode) {
1817 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1818 FAIL;
1819 }
1820 }
1821 break;
1822
1823 case SRE_OP_BRANCH:
1824 {
1825 SRE_CODE *target = NULL;
1826 for (;;) {
1827 GET_SKIP;
1828 if (skip == 0)
1829 break;
1830 /* Stop 2 before the end; we check the JUMP below */
1831 if (!_validate_inner(code, code+skip-3, groups))
1832 FAIL;
1833 code += skip-3;
1834 /* Check that it ends with a JUMP, and that each JUMP
1835 has the same target */
1836 GET_OP;
1837 if (op != SRE_OP_JUMP)
1838 FAIL;
1839 GET_SKIP;
1840 if (target == NULL)
1841 target = code+skip-1;
1842 else if (code+skip-1 != target)
1843 FAIL;
1844 }
1845 }
1846 break;
1847
1848 case SRE_OP_REPEAT_ONE:
1849 case SRE_OP_MIN_REPEAT_ONE:
1850 {
1851 SRE_CODE min, max;
1852 GET_SKIP;
1853 GET_ARG; min = arg;
1854 GET_ARG; max = arg;
1855 if (min > max)
1856 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001857 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001858 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001859 if (!_validate_inner(code, code+skip-4, groups))
1860 FAIL;
1861 code += skip-4;
1862 GET_OP;
1863 if (op != SRE_OP_SUCCESS)
1864 FAIL;
1865 }
1866 break;
1867
1868 case SRE_OP_REPEAT:
1869 {
1870 SRE_CODE min, max;
1871 GET_SKIP;
1872 GET_ARG; min = arg;
1873 GET_ARG; max = arg;
1874 if (min > max)
1875 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001876 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001877 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001878 if (!_validate_inner(code, code+skip-3, groups))
1879 FAIL;
1880 code += skip-3;
1881 GET_OP;
1882 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1883 FAIL;
1884 }
1885 break;
1886
1887 case SRE_OP_GROUPREF:
1888 case SRE_OP_GROUPREF_IGNORE:
1889 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001890 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001891 FAIL;
1892 break;
1893
1894 case SRE_OP_GROUPREF_EXISTS:
1895 /* The regex syntax for this is: '(?(group)then|else)', where
1896 'group' is either an integer group number or a group name,
1897 'then' and 'else' are sub-regexes, and 'else' is optional. */
1898 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001899 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001900 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001901 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001902 code--; /* The skip is relative to the first arg! */
1903 /* There are two possibilities here: if there is both a 'then'
1904 part and an 'else' part, the generated code looks like:
1905
1906 GROUPREF_EXISTS
1907 <group>
1908 <skipyes>
1909 ...then part...
1910 JUMP
1911 <skipno>
1912 (<skipyes> jumps here)
1913 ...else part...
1914 (<skipno> jumps here)
1915
1916 If there is only a 'then' part, it looks like:
1917
1918 GROUPREF_EXISTS
1919 <group>
1920 <skip>
1921 ...then part...
1922 (<skip> jumps here)
1923
1924 There is no direct way to decide which it is, and we don't want
1925 to allow arbitrary jumps anywhere in the code; so we just look
1926 for a JUMP opcode preceding our skip target.
1927 */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001928 if (skip >= 3 && skip-3 < (Py_uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001929 code[skip-3] == SRE_OP_JUMP)
1930 {
1931 VTRACE(("both then and else parts present\n"));
1932 if (!_validate_inner(code+1, code+skip-3, groups))
1933 FAIL;
1934 code += skip-2; /* Position after JUMP, at <skipno> */
1935 GET_SKIP;
1936 if (!_validate_inner(code, code+skip-1, groups))
1937 FAIL;
1938 code += skip-1;
1939 }
1940 else {
1941 VTRACE(("only a then part present\n"));
1942 if (!_validate_inner(code+1, code+skip-1, groups))
1943 FAIL;
1944 code += skip-1;
1945 }
1946 break;
1947
1948 case SRE_OP_ASSERT:
1949 case SRE_OP_ASSERT_NOT:
1950 GET_SKIP;
1951 GET_ARG; /* 0 for lookahead, width for lookbehind */
1952 code--; /* Back up over arg to simplify math below */
1953 if (arg & 0x80000000)
1954 FAIL; /* Width too large */
1955 /* Stop 1 before the end; we check the SUCCESS below */
1956 if (!_validate_inner(code+1, code+skip-2, groups))
1957 FAIL;
1958 code += skip-2;
1959 GET_OP;
1960 if (op != SRE_OP_SUCCESS)
1961 FAIL;
1962 break;
1963
1964 default:
1965 FAIL;
1966
1967 }
1968 }
1969
1970 VTRACE(("okay\n"));
1971 return 1;
1972}
1973
1974static int
1975_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1976{
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001977 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1978 code >= end || end[-1] != SRE_OP_SUCCESS)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001979 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001980 return _validate_inner(code, end-1, groups);
1981}
1982
1983static int
1984_validate(PatternObject *self)
1985{
1986 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1987 {
1988 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1989 return 0;
1990 }
1991 else
1992 VTRACE(("Success!\n"));
1993 return 1;
1994}
1995
1996/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001997/* match methods */
1998
1999static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002000match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002001{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002002 Py_XDECREF(self->regs);
2003 Py_XDECREF(self->string);
2004 Py_DECREF(self->pattern);
2005 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00002006}
2007
2008static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002009match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00002010{
Serhiy Storchaka25324972013-10-16 12:46:28 +03002011 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002012 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03002013 Py_buffer view;
2014 PyObject *result;
2015 void* ptr;
2016
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002017 if (index < 0 || index >= self->groups) {
2018 /* raise IndexError if we were given a bad group number */
2019 PyErr_SetString(
2020 PyExc_IndexError,
2021 "no such group"
2022 );
2023 return NULL;
2024 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002025
Fredrik Lundh6f013982000-07-03 18:44:21 +00002026 index *= 2;
2027
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002028 if (self->string == Py_None || self->mark[index] < 0) {
2029 /* return default value if the string or group is undefined */
2030 Py_INCREF(def);
2031 return def;
2032 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002033
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002034 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03002035 if (ptr == NULL)
2036 return NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002037 result = getslice(isbytes, ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +03002038 self->string, self->mark[index], self->mark[index+1]);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002039 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03002040 PyBuffer_Release(&view);
2041 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002042}
2043
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002044static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002045match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002046{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002047 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002048
Guido van Rossumddefaf32007-01-14 03:31:43 +00002049 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002050 /* Default value */
2051 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00002052
Christian Heimes217cfd12007-12-02 14:31:20 +00002053 if (PyLong_Check(index))
2054 return PyLong_AsSsize_t(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00002055
Fredrik Lundh6f013982000-07-03 18:44:21 +00002056 i = -1;
2057
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002058 if (self->pattern->groupindex) {
2059 index = PyObject_GetItem(self->pattern->groupindex, index);
2060 if (index) {
Neal Norwitz1fe5f382007-08-31 04:32:55 +00002061 if (PyLong_Check(index))
Christian Heimes217cfd12007-12-02 14:31:20 +00002062 i = PyLong_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00002063 Py_DECREF(index);
2064 } else
2065 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002066 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002067
2068 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002069}
2070
2071static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002072match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002073{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002074 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002075}
2076
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002077/*[clinic input]
2078_sre.SRE_Match.expand
2079
2080 template: object
2081
2082Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
2083[clinic start generated code]*/
2084
2085static PyObject *
2086_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
2087/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002088{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002089 /* delegate to Python code */
2090 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00002091 SRE_PY_MODULE, "_expand",
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002092 PyTuple_Pack(3, self->pattern, self, template)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002093 );
2094}
2095
2096static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002097match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002098{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002099 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002100 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002101
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002102 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002103
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002104 switch (size) {
2105 case 0:
2106 result = match_getslice(self, Py_False, Py_None);
2107 break;
2108 case 1:
2109 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2110 break;
2111 default:
2112 /* fetch multiple items */
2113 result = PyTuple_New(size);
2114 if (!result)
2115 return NULL;
2116 for (i = 0; i < size; i++) {
2117 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002118 self, PyTuple_GET_ITEM(args, i), Py_None
2119 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002120 if (!item) {
2121 Py_DECREF(result);
2122 return NULL;
2123 }
2124 PyTuple_SET_ITEM(result, i, item);
2125 }
2126 break;
2127 }
2128 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002129}
2130
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002131/*[clinic input]
2132_sre.SRE_Match.groups
2133
2134 default: object = None
2135 Is used for groups that did not participate in the match.
2136
2137Return a tuple containing all the subgroups of the match, from 1.
2138[clinic start generated code]*/
2139
2140static PyObject *
2141_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2142/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002143{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002144 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002145 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002146
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002147 result = PyTuple_New(self->groups-1);
2148 if (!result)
2149 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002150
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002151 for (index = 1; index < self->groups; index++) {
2152 PyObject* item;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002153 item = match_getslice_by_index(self, index, default_value);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002154 if (!item) {
2155 Py_DECREF(result);
2156 return NULL;
2157 }
2158 PyTuple_SET_ITEM(result, index-1, item);
2159 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002160
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002161 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002162}
2163
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002164/*[clinic input]
2165_sre.SRE_Match.groupdict
2166
2167 default: object = None
2168 Is used for groups that did not participate in the match.
2169
2170Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2171[clinic start generated code]*/
2172
2173static PyObject *
2174_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2175/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002176{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002177 PyObject* result;
2178 PyObject* keys;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002179 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002180
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002181 result = PyDict_New();
2182 if (!result || !self->pattern->groupindex)
2183 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002184
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002185 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002186 if (!keys)
2187 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002188
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002189 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002190 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002191 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002192 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002193 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002194 if (!key)
2195 goto failed;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002196 value = match_getslice(self, key, default_value);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002197 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002198 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002199 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002200 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002201 status = PyDict_SetItem(result, key, value);
2202 Py_DECREF(value);
2203 if (status < 0)
2204 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002205 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002206
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002207 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002208
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002209 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002210
2211failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00002212 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002213 Py_DECREF(result);
2214 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002215}
2216
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002217/*[clinic input]
2218_sre.SRE_Match.start -> Py_ssize_t
2219
2220 group: object(c_default="NULL") = 0
2221 /
2222
2223Return index of the start of the substring matched by group.
2224[clinic start generated code]*/
2225
2226static Py_ssize_t
2227_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2228/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002229{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002230 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002231
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002232 if (index < 0 || index >= self->groups) {
2233 PyErr_SetString(
2234 PyExc_IndexError,
2235 "no such group"
2236 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002237 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002238 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002239
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002240 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002241 return self->mark[index*2];
Guido van Rossumb700df92000-03-31 14:59:30 +00002242}
2243
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002244/*[clinic input]
2245_sre.SRE_Match.end -> Py_ssize_t
2246
2247 group: object(c_default="NULL") = 0
2248 /
2249
2250Return index of the end of the substring matched by group.
2251[clinic start generated code]*/
2252
2253static Py_ssize_t
2254_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2255/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002256{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002257 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002258
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002259 if (index < 0 || index >= self->groups) {
2260 PyErr_SetString(
2261 PyExc_IndexError,
2262 "no such group"
2263 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002264 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002265 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002266
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002267 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002268 return self->mark[index*2+1];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002269}
2270
2271LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002272_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002273{
2274 PyObject* pair;
2275 PyObject* item;
2276
2277 pair = PyTuple_New(2);
2278 if (!pair)
2279 return NULL;
2280
Christian Heimes217cfd12007-12-02 14:31:20 +00002281 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002282 if (!item)
2283 goto error;
2284 PyTuple_SET_ITEM(pair, 0, item);
2285
Christian Heimes217cfd12007-12-02 14:31:20 +00002286 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002287 if (!item)
2288 goto error;
2289 PyTuple_SET_ITEM(pair, 1, item);
2290
2291 return pair;
2292
2293 error:
2294 Py_DECREF(pair);
2295 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002296}
2297
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002298/*[clinic input]
2299_sre.SRE_Match.span
2300
2301 group: object(c_default="NULL") = 0
2302 /
2303
2304For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).
2305[clinic start generated code]*/
2306
2307static PyObject *
2308_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2309/*[clinic end generated code: output=f02ae40594d14fe6 input=49092b6008d176d3]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002310{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002311 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002312
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002313 if (index < 0 || index >= self->groups) {
2314 PyErr_SetString(
2315 PyExc_IndexError,
2316 "no such group"
2317 );
2318 return NULL;
2319 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002320
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002321 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002322 return _pair(self->mark[index*2], self->mark[index*2+1]);
2323}
2324
2325static PyObject*
2326match_regs(MatchObject* self)
2327{
2328 PyObject* regs;
2329 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002330 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002331
2332 regs = PyTuple_New(self->groups);
2333 if (!regs)
2334 return NULL;
2335
2336 for (index = 0; index < self->groups; index++) {
2337 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2338 if (!item) {
2339 Py_DECREF(regs);
2340 return NULL;
2341 }
2342 PyTuple_SET_ITEM(regs, index, item);
2343 }
2344
2345 Py_INCREF(regs);
2346 self->regs = regs;
2347
2348 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002349}
2350
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002351/*[clinic input]
2352_sre.SRE_Match.__copy__
2353
2354[clinic start generated code]*/
2355
2356static PyObject *
2357_sre_SRE_Match___copy___impl(MatchObject *self)
2358/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002359{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002360#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002361 MatchObject* copy;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002362 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00002363
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002364 slots = 2 * (self->pattern->groups+1);
2365
2366 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2367 if (!copy)
2368 return NULL;
2369
2370 /* this value a constant, but any compiler should be able to
2371 figure that out all by itself */
2372 offset = offsetof(MatchObject, string);
2373
2374 Py_XINCREF(self->pattern);
2375 Py_XINCREF(self->string);
2376 Py_XINCREF(self->regs);
2377
2378 memcpy((char*) copy + offset, (char*) self + offset,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002379 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002380
2381 return (PyObject*) copy;
2382#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002383 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002384 return NULL;
2385#endif
2386}
2387
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002388/*[clinic input]
2389_sre.SRE_Match.__deepcopy__
2390
2391 memo: object
2392
2393[clinic start generated code]*/
2394
2395static PyObject *
2396_sre_SRE_Match___deepcopy___impl(MatchObject *self, PyObject *memo)
2397/*[clinic end generated code: output=2b657578eb03f4a3 input=b65b72489eac64cc]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002398{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002399#ifdef USE_BUILTIN_COPY
2400 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002401
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002402 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002403 if (!copy)
2404 return NULL;
2405
2406 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2407 !deepcopy(&copy->string, memo) ||
2408 !deepcopy(&copy->regs, memo)) {
2409 Py_DECREF(copy);
2410 return NULL;
2411 }
2412
2413#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002414 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2415 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002416#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002417}
2418
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002419PyDoc_STRVAR(match_doc,
2420"The result of re.match() and re.search().\n\
2421Match objects always have a boolean value of True.");
2422
2423PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002424"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002425 Return subgroup(s) of the match by indices or names.\n\
2426 For 0 returns the entire match.");
2427
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002428static PyObject *
2429match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002430{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002431 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002432 return PyLong_FromSsize_t(self->lastindex);
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002433 Py_INCREF(Py_None);
2434 return Py_None;
Guido van Rossumb700df92000-03-31 14:59:30 +00002435}
2436
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002437static PyObject *
2438match_lastgroup_get(MatchObject *self)
2439{
2440 if (self->pattern->indexgroup && self->lastindex >= 0) {
2441 PyObject* result = PySequence_GetItem(
2442 self->pattern->indexgroup, self->lastindex
2443 );
2444 if (result)
2445 return result;
2446 PyErr_Clear();
2447 }
2448 Py_INCREF(Py_None);
2449 return Py_None;
2450}
2451
2452static PyObject *
2453match_regs_get(MatchObject *self)
2454{
2455 if (self->regs) {
2456 Py_INCREF(self->regs);
2457 return self->regs;
2458 } else
2459 return match_regs(self);
2460}
2461
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002462static PyObject *
2463match_repr(MatchObject *self)
2464{
2465 PyObject *result;
2466 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2467 if (group0 == NULL)
2468 return NULL;
2469 result = PyUnicode_FromFormat(
2470 "<%s object; span=(%d, %d), match=%.50R>",
2471 Py_TYPE(self)->tp_name,
2472 self->mark[0], self->mark[1], group0);
2473 Py_DECREF(group0);
2474 return result;
2475}
2476
2477
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002478static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002479pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002480{
2481 /* create match object (from state object) */
2482
2483 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002484 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002485 char* base;
2486 int n;
2487
2488 if (status > 0) {
2489
2490 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002491 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002492 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2493 2*(pattern->groups+1));
2494 if (!match)
2495 return NULL;
2496
2497 Py_INCREF(pattern);
2498 match->pattern = pattern;
2499
2500 Py_INCREF(state->string);
2501 match->string = state->string;
2502
2503 match->regs = NULL;
2504 match->groups = pattern->groups+1;
2505
2506 /* fill in group slices */
2507
2508 base = (char*) state->beginning;
2509 n = state->charsize;
2510
2511 match->mark[0] = ((char*) state->start - base) / n;
2512 match->mark[1] = ((char*) state->ptr - base) / n;
2513
2514 for (i = j = 0; i < pattern->groups; i++, j+=2)
2515 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2516 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2517 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2518 } else
2519 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2520
2521 match->pos = state->pos;
2522 match->endpos = state->endpos;
2523
2524 match->lastindex = state->lastindex;
2525
2526 return (PyObject*) match;
2527
2528 } else if (status == 0) {
2529
2530 /* no match */
2531 Py_INCREF(Py_None);
2532 return Py_None;
2533
2534 }
2535
2536 /* internal error */
2537 pattern_error(status);
2538 return NULL;
2539}
2540
2541
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002542/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002543/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002544
2545static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002546scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002547{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002548 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002549 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002550 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002551}
2552
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002553/*[clinic input]
2554_sre.SRE_Scanner.match
2555
2556[clinic start generated code]*/
2557
2558static PyObject *
2559_sre_SRE_Scanner_match_impl(ScannerObject *self)
2560/*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002561{
2562 SRE_STATE* state = &self->state;
2563 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002564 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002565
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002566 if (state->start == NULL)
2567 Py_RETURN_NONE;
2568
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002569 state_reset(state);
2570
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002571 state->ptr = state->start;
2572
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03002573 status = sre_match(state, PatternObject_GetCode(self->pattern), 0);
Thomas Wouters89f507f2006-12-13 04:49:30 +00002574 if (PyErr_Occurred())
2575 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002576
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002577 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002578 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002579
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002580 if (status == 0)
2581 state->start = NULL;
2582 else if (state->ptr != state->start)
2583 state->start = state->ptr;
2584 else if (state->ptr != state->end)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002585 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002586 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002587 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002588
2589 return match;
2590}
2591
2592
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002593/*[clinic input]
2594_sre.SRE_Scanner.search
2595
2596[clinic start generated code]*/
2597
2598static PyObject *
2599_sre_SRE_Scanner_search_impl(ScannerObject *self)
2600/*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002601{
2602 SRE_STATE* state = &self->state;
2603 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002604 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002605
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002606 if (state->start == NULL)
2607 Py_RETURN_NONE;
2608
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002609 state_reset(state);
2610
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002611 state->ptr = state->start;
2612
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002613 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002614 if (PyErr_Occurred())
2615 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002616
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002617 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002618 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002619
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002620 if (status == 0)
2621 state->start = NULL;
2622 else if (state->ptr != state->start)
2623 state->start = state->ptr;
2624 else if (state->ptr != state->end)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002625 state->start = (void*) ((char*) state->ptr + state->charsize);
2626 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002627 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002628
2629 return match;
2630}
2631
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002632static PyObject *
2633pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002634{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002635 ScannerObject* scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002636
2637 /* create scanner object */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002638 scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2639 if (!scanner)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002640 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002641 scanner->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002642
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002643 /* create search state object */
2644 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2645 Py_DECREF(scanner);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002646 return NULL;
2647 }
2648
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002649 Py_INCREF(self);
2650 scanner->pattern = (PyObject*) self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002651
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002652 return (PyObject*) scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002653}
2654
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002655#include "clinic/_sre.c.h"
2656
2657static PyMethodDef pattern_methods[] = {
2658 _SRE_SRE_PATTERN_MATCH_METHODDEF
2659 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2660 _SRE_SRE_PATTERN_SEARCH_METHODDEF
2661 _SRE_SRE_PATTERN_SUB_METHODDEF
2662 _SRE_SRE_PATTERN_SUBN_METHODDEF
2663 _SRE_SRE_PATTERN_FINDALL_METHODDEF
2664 _SRE_SRE_PATTERN_SPLIT_METHODDEF
2665 _SRE_SRE_PATTERN_FINDITER_METHODDEF
2666 _SRE_SRE_PATTERN_SCANNER_METHODDEF
2667 _SRE_SRE_PATTERN___COPY___METHODDEF
2668 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2669 {NULL, NULL}
2670};
2671
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002672static PyGetSetDef pattern_getset[] = {
2673 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2674 "A dictionary mapping group names to group numbers."},
2675 {NULL} /* Sentinel */
2676};
2677
2678#define PAT_OFF(x) offsetof(PatternObject, x)
2679static PyMemberDef pattern_members[] = {
2680 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
2681 {"flags", T_INT, PAT_OFF(flags), READONLY},
2682 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
2683 {NULL} /* Sentinel */
2684};
2685
2686static PyTypeObject Pattern_Type = {
2687 PyVarObject_HEAD_INIT(NULL, 0)
2688 "_" SRE_MODULE ".SRE_Pattern",
2689 sizeof(PatternObject), sizeof(SRE_CODE),
2690 (destructor)pattern_dealloc, /* tp_dealloc */
2691 0, /* tp_print */
2692 0, /* tp_getattr */
2693 0, /* tp_setattr */
2694 0, /* tp_reserved */
2695 (reprfunc)pattern_repr, /* tp_repr */
2696 0, /* tp_as_number */
2697 0, /* tp_as_sequence */
2698 0, /* tp_as_mapping */
2699 0, /* tp_hash */
2700 0, /* tp_call */
2701 0, /* tp_str */
2702 0, /* tp_getattro */
2703 0, /* tp_setattro */
2704 0, /* tp_as_buffer */
2705 Py_TPFLAGS_DEFAULT, /* tp_flags */
2706 pattern_doc, /* tp_doc */
2707 0, /* tp_traverse */
2708 0, /* tp_clear */
2709 0, /* tp_richcompare */
2710 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2711 0, /* tp_iter */
2712 0, /* tp_iternext */
2713 pattern_methods, /* tp_methods */
2714 pattern_members, /* tp_members */
2715 pattern_getset, /* tp_getset */
2716};
2717
2718
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002719static PyMethodDef match_methods[] = {
2720 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2721 _SRE_SRE_MATCH_START_METHODDEF
2722 _SRE_SRE_MATCH_END_METHODDEF
2723 _SRE_SRE_MATCH_SPAN_METHODDEF
2724 _SRE_SRE_MATCH_GROUPS_METHODDEF
2725 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2726 _SRE_SRE_MATCH_EXPAND_METHODDEF
2727 _SRE_SRE_MATCH___COPY___METHODDEF
2728 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2729 {NULL, NULL}
2730};
2731
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002732static PyGetSetDef match_getset[] = {
2733 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
2734 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
2735 {"regs", (getter)match_regs_get, (setter)NULL},
2736 {NULL}
2737};
2738
2739#define MATCH_OFF(x) offsetof(MatchObject, x)
2740static PyMemberDef match_members[] = {
2741 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
2742 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
2743 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
2744 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
2745 {NULL}
2746};
2747
2748/* FIXME: implement setattr("string", None) as a special case (to
2749 detach the associated string, if any */
2750
2751static PyTypeObject Match_Type = {
2752 PyVarObject_HEAD_INIT(NULL,0)
2753 "_" SRE_MODULE ".SRE_Match",
2754 sizeof(MatchObject), sizeof(Py_ssize_t),
2755 (destructor)match_dealloc, /* tp_dealloc */
2756 0, /* tp_print */
2757 0, /* tp_getattr */
2758 0, /* tp_setattr */
2759 0, /* tp_reserved */
2760 (reprfunc)match_repr, /* tp_repr */
2761 0, /* tp_as_number */
2762 0, /* tp_as_sequence */
2763 0, /* tp_as_mapping */
2764 0, /* tp_hash */
2765 0, /* tp_call */
2766 0, /* tp_str */
2767 0, /* tp_getattro */
2768 0, /* tp_setattro */
2769 0, /* tp_as_buffer */
2770 Py_TPFLAGS_DEFAULT, /* tp_flags */
2771 match_doc, /* tp_doc */
2772 0, /* tp_traverse */
2773 0, /* tp_clear */
2774 0, /* tp_richcompare */
2775 0, /* tp_weaklistoffset */
2776 0, /* tp_iter */
2777 0, /* tp_iternext */
2778 match_methods, /* tp_methods */
2779 match_members, /* tp_members */
2780 match_getset, /* tp_getset */
2781};
2782
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002783static PyMethodDef scanner_methods[] = {
2784 _SRE_SRE_SCANNER_MATCH_METHODDEF
2785 _SRE_SRE_SCANNER_SEARCH_METHODDEF
2786 {NULL, NULL}
2787};
2788
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002789#define SCAN_OFF(x) offsetof(ScannerObject, x)
2790static PyMemberDef scanner_members[] = {
2791 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2792 {NULL} /* Sentinel */
2793};
2794
2795static PyTypeObject Scanner_Type = {
2796 PyVarObject_HEAD_INIT(NULL, 0)
2797 "_" SRE_MODULE ".SRE_Scanner",
2798 sizeof(ScannerObject), 0,
2799 (destructor)scanner_dealloc,/* tp_dealloc */
2800 0, /* tp_print */
2801 0, /* tp_getattr */
2802 0, /* tp_setattr */
2803 0, /* tp_reserved */
2804 0, /* tp_repr */
2805 0, /* tp_as_number */
2806 0, /* tp_as_sequence */
2807 0, /* tp_as_mapping */
2808 0, /* tp_hash */
2809 0, /* tp_call */
2810 0, /* tp_str */
2811 0, /* tp_getattro */
2812 0, /* tp_setattro */
2813 0, /* tp_as_buffer */
2814 Py_TPFLAGS_DEFAULT, /* tp_flags */
2815 0, /* tp_doc */
2816 0, /* tp_traverse */
2817 0, /* tp_clear */
2818 0, /* tp_richcompare */
2819 0, /* tp_weaklistoffset */
2820 0, /* tp_iter */
2821 0, /* tp_iternext */
2822 scanner_methods, /* tp_methods */
2823 scanner_members, /* tp_members */
2824 0, /* tp_getset */
2825};
2826
Guido van Rossumb700df92000-03-31 14:59:30 +00002827static PyMethodDef _functions[] = {
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002828 _SRE_COMPILE_METHODDEF
2829 _SRE_GETCODESIZE_METHODDEF
2830 _SRE_GETLOWER_METHODDEF
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002831 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002832};
2833
Martin v. Löwis1a214512008-06-11 05:26:20 +00002834static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002835 PyModuleDef_HEAD_INIT,
2836 "_" SRE_MODULE,
2837 NULL,
2838 -1,
2839 _functions,
2840 NULL,
2841 NULL,
2842 NULL,
2843 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002844};
2845
2846PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002847{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002848 PyObject* m;
2849 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002850 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002851
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002852 /* Patch object types */
2853 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2854 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002855 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002856
Martin v. Löwis1a214512008-06-11 05:26:20 +00002857 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002858 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002859 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002860 d = PyModule_GetDict(m);
2861
Christian Heimes217cfd12007-12-02 14:31:20 +00002862 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002863 if (x) {
2864 PyDict_SetItemString(d, "MAGIC", x);
2865 Py_DECREF(x);
2866 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002867
Christian Heimes217cfd12007-12-02 14:31:20 +00002868 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002869 if (x) {
2870 PyDict_SetItemString(d, "CODESIZE", x);
2871 Py_DECREF(x);
2872 }
2873
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002874 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2875 if (x) {
2876 PyDict_SetItemString(d, "MAXREPEAT", x);
2877 Py_DECREF(x);
2878 }
2879
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03002880 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2881 if (x) {
2882 PyDict_SetItemString(d, "MAXGROUPS", x);
2883 Py_DECREF(x);
2884 }
2885
Neal Norwitzfe537132007-08-26 03:55:15 +00002886 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002887 if (x) {
2888 PyDict_SetItemString(d, "copyright", x);
2889 Py_DECREF(x);
2890 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002891 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002892}
2893
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002894/* vim:ts=4:sw=4:et
2895*/