blob: 957ccbc7fff56e2463ccab450a8de96c16a5eb71 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000038static char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000063/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064
65/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000066#define USE_FAST_SEARCH
67
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000068/* enables copy/deepcopy handling (work in progress) */
69#undef USE_BUILTIN_COPY
70
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000071/* -------------------------------------------------------------------- */
72
Fredrik Lundh80946112000-06-29 18:03:25 +000073#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000074#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000075#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000076/* fastest possible local call under MSVC */
77#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000078#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000079#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080#else
81#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000082#endif
83
84/* error codes */
85#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000086#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000087#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000088#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000089#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000090
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000091#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000092#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000093#else
94#define TRACE(v)
95#endif
96
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000097/* -------------------------------------------------------------------- */
98/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000099
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000100#define SRE_IS_DIGIT(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300101 ((ch) < 128 && Py_ISDIGIT(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000102#define SRE_IS_SPACE(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300103 ((ch) < 128 && Py_ISSPACE(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000104#define SRE_IS_LINEBREAK(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300105 ((ch) == '\n')
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000106#define SRE_IS_ALNUM(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300107 ((ch) < 128 && Py_ISALNUM(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000108#define SRE_IS_WORD(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300109 ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
Guido van Rossumb700df92000-03-31 14:59:30 +0000110
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000111static unsigned int sre_lower(unsigned int ch)
112{
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300113 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000114}
115
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200116static unsigned int sre_upper(unsigned int ch)
117{
118 return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
119}
120
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000121/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000122/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
123 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000124#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000125#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
126
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000127static unsigned int sre_lower_locale(unsigned int ch)
128{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000129 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000130}
131
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200132static unsigned int sre_upper_locale(unsigned int ch)
133{
134 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
135}
136
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000137/* unicode-specific character predicates */
138
Victor Stinner0058b862011-09-29 03:27:47 +0200139#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
140#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
141#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
142#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
143#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000144
145static unsigned int sre_lower_unicode(unsigned int ch)
146{
Victor Stinner0058b862011-09-29 03:27:47 +0200147 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000148}
149
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200150static unsigned int sre_upper_unicode(unsigned int ch)
151{
152 return (unsigned int) Py_UNICODE_TOUPPER(ch);
153}
154
Guido van Rossumb700df92000-03-31 14:59:30 +0000155LOCAL(int)
156sre_category(SRE_CODE category, unsigned int ch)
157{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000158 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000159
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000160 case SRE_CATEGORY_DIGIT:
161 return SRE_IS_DIGIT(ch);
162 case SRE_CATEGORY_NOT_DIGIT:
163 return !SRE_IS_DIGIT(ch);
164 case SRE_CATEGORY_SPACE:
165 return SRE_IS_SPACE(ch);
166 case SRE_CATEGORY_NOT_SPACE:
167 return !SRE_IS_SPACE(ch);
168 case SRE_CATEGORY_WORD:
169 return SRE_IS_WORD(ch);
170 case SRE_CATEGORY_NOT_WORD:
171 return !SRE_IS_WORD(ch);
172 case SRE_CATEGORY_LINEBREAK:
173 return SRE_IS_LINEBREAK(ch);
174 case SRE_CATEGORY_NOT_LINEBREAK:
175 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000176
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000177 case SRE_CATEGORY_LOC_WORD:
178 return SRE_LOC_IS_WORD(ch);
179 case SRE_CATEGORY_LOC_NOT_WORD:
180 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000181
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000182 case SRE_CATEGORY_UNI_DIGIT:
183 return SRE_UNI_IS_DIGIT(ch);
184 case SRE_CATEGORY_UNI_NOT_DIGIT:
185 return !SRE_UNI_IS_DIGIT(ch);
186 case SRE_CATEGORY_UNI_SPACE:
187 return SRE_UNI_IS_SPACE(ch);
188 case SRE_CATEGORY_UNI_NOT_SPACE:
189 return !SRE_UNI_IS_SPACE(ch);
190 case SRE_CATEGORY_UNI_WORD:
191 return SRE_UNI_IS_WORD(ch);
192 case SRE_CATEGORY_UNI_NOT_WORD:
193 return !SRE_UNI_IS_WORD(ch);
194 case SRE_CATEGORY_UNI_LINEBREAK:
195 return SRE_UNI_IS_LINEBREAK(ch);
196 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
197 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000198 }
199 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000200}
201
202/* helpers */
203
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000204static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000205data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000206{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000207 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000208 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000209 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000210 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000211 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000212}
213
214static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000215data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000216{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000217 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000218 minsize = state->data_stack_base+size;
219 cursize = state->data_stack_size;
220 if (cursize < minsize) {
221 void* stack;
222 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300223 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000224 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000225 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000226 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000227 return SRE_ERROR_MEMORY;
228 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000229 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000230 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000231 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000232 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000233}
234
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000235/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000236
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300237#define SRE_CHAR Py_UCS1
238#define SIZEOF_SRE_CHAR 1
239#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300240#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000241
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300242/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000243
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300244#define SRE_CHAR Py_UCS2
245#define SIZEOF_SRE_CHAR 2
246#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300247#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000248
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300249/* generate 32-bit unicode version */
250
251#define SRE_CHAR Py_UCS4
252#define SIZEOF_SRE_CHAR 4
253#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300254#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000255
256/* -------------------------------------------------------------------- */
257/* factories and destructors */
258
259/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100260static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300261static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
Guido van Rossumb700df92000-03-31 14:59:30 +0000262
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300263
264/*[clinic input]
265module _sre
266class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
267class _sre.SRE_Match "MatchObject *" "&Match_Type"
268class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
269[clinic start generated code]*/
270/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
271
Larry Hastings2d0a69a2015-05-03 14:49:19 -0700272static PyTypeObject Pattern_Type;
273static PyTypeObject Match_Type;
274static PyTypeObject Scanner_Type;
275
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300276/*[clinic input]
277_sre.getcodesize -> int
278[clinic start generated code]*/
279
280static int
281_sre_getcodesize_impl(PyModuleDef *module)
282/*[clinic end generated code: output=794f1f98ef4883e5 input=bd6f6ecf4916bb2b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000283{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300284 return sizeof(SRE_CODE);
Guido van Rossumb700df92000-03-31 14:59:30 +0000285}
286
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300287/*[clinic input]
288_sre.getlower -> int
289
290 character: int
291 flags: int
292 /
293
294[clinic start generated code]*/
295
296static int
297_sre_getlower_impl(PyModuleDef *module, int character, int flags)
298/*[clinic end generated code: output=5fc3616ae2a4c306 input=087d2f1c44bbca6f]*/
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000299{
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000300 if (flags & SRE_FLAG_LOCALE)
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300301 return sre_lower_locale(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000302 if (flags & SRE_FLAG_UNICODE)
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300303 return sre_lower_unicode(character);
304 return sre_lower(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000305}
306
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000307LOCAL(void)
308state_reset(SRE_STATE* state)
309{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000310 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000311 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000312
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000313 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000314 state->lastindex = -1;
315
316 state->repeat = NULL;
317
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000318 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000319}
320
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000321static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200322getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300323 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600324 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000325{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000326 /* given a python object, return a data pointer, a length (in
327 characters), and a character size. return NULL if the object
328 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000329
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000330 /* Unicode objects do not support the buffer API. So, get the data
331 directly instead. */
332 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200333 if (PyUnicode_READY(string) == -1)
334 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200335 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200336 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300337 *p_isbytes = 0;
338 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000339 }
340
Victor Stinner0058b862011-09-29 03:27:47 +0200341 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300342 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200343 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300344 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000345 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000346
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300347 *p_length = view->len;
348 *p_charsize = 1;
349 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000350
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300351 if (view->buf == NULL) {
352 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
353 PyBuffer_Release(view);
354 view->buf = NULL;
355 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000356 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300357 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000358}
359
360LOCAL(PyObject*)
361state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000362 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000363{
364 /* prepare state object */
365
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000366 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300367 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000368 void* ptr;
369
370 memset(state, 0, sizeof(SRE_STATE));
371
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300372 state->mark = PyMem_New(void *, pattern->groups * 2);
373 if (!state->mark) {
374 PyErr_NoMemory();
375 goto err;
376 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000377 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000378 state->lastindex = -1;
379
Benjamin Petersone48944b2012-03-07 14:50:25 -0600380 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300381 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000382 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600383 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000384
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300385 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600386 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200387 "cannot use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600388 goto err;
389 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300390 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600391 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200392 "cannot use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600393 goto err;
394 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000395
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000396 /* adjust boundaries */
397 if (start < 0)
398 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000399 else if (start > length)
400 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000401
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000402 if (end < 0)
403 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000404 else if (end > length)
405 end = length;
406
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300407 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000408 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000409
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000410 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000411
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000412 state->start = (void*) ((char*) ptr + start * state->charsize);
413 state->end = (void*) ((char*) ptr + end * state->charsize);
414
415 Py_INCREF(string);
416 state->string = string;
417 state->pos = start;
418 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000419
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200420 if (pattern->flags & SRE_FLAG_LOCALE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000421 state->lower = sre_lower_locale;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200422 state->upper = sre_upper_locale;
423 }
424 else if (pattern->flags & SRE_FLAG_UNICODE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000425 state->lower = sre_lower_unicode;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200426 state->upper = sre_upper_unicode;
427 }
428 else {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000429 state->lower = sre_lower;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200430 state->upper = sre_upper;
431 }
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000432
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000433 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600434 err:
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300435 PyMem_Del(state->mark);
436 state->mark = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600437 if (state->buffer.buf)
438 PyBuffer_Release(&state->buffer);
439 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000440}
441
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000442LOCAL(void)
443state_fini(SRE_STATE* state)
444{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600445 if (state->buffer.buf)
446 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000447 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000448 data_stack_dealloc(state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300449 PyMem_Del(state->mark);
450 state->mark = NULL;
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000451}
452
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000453/* calculate offset from start of string */
454#define STATE_OFFSET(state, member)\
455 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
456
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000457LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300458getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300459 PyObject* string, Py_ssize_t start, Py_ssize_t end)
460{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300461 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300462 if (PyBytes_CheckExact(string) &&
463 start == 0 && end == PyBytes_GET_SIZE(string)) {
464 Py_INCREF(string);
465 return string;
466 }
467 return PyBytes_FromStringAndSize(
468 (const char *)ptr + start, end - start);
469 }
470 else {
471 return PyUnicode_Substring(string, start, end);
472 }
473}
474
475LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000476state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000477{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000478 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000479
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000480 index = (index - 1) * 2;
481
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000482 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000483 if (empty)
484 /* want empty string */
485 i = j = 0;
486 else {
487 Py_INCREF(Py_None);
488 return Py_None;
489 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000490 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000491 i = STATE_OFFSET(state, state->mark[index]);
492 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000493 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000494
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300495 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000496}
497
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000498static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100499pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000500{
501 switch (status) {
502 case SRE_ERROR_RECURSION_LIMIT:
Yury Selivanovf488fb42015-07-03 01:04:23 -0400503 /* This error code seems to be unused. */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000504 PyErr_SetString(
Yury Selivanovf488fb42015-07-03 01:04:23 -0400505 PyExc_RecursionError,
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000506 "maximum recursion limit exceeded"
507 );
508 break;
509 case SRE_ERROR_MEMORY:
510 PyErr_NoMemory();
511 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000512 case SRE_ERROR_INTERRUPTED:
513 /* An exception has already been raised, so let it fly */
514 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000515 default:
516 /* other error codes indicate compiler/engine bugs */
517 PyErr_SetString(
518 PyExc_RuntimeError,
519 "internal error in regular expression engine"
520 );
521 }
522}
523
Guido van Rossumb700df92000-03-31 14:59:30 +0000524static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000525pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000526{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000527 if (self->weakreflist != NULL)
528 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000529 Py_XDECREF(self->pattern);
530 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000531 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000532 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000533}
534
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300535LOCAL(Py_ssize_t)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300536sre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300537{
538 if (state->charsize == 1)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300539 return sre_ucs1_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300540 if (state->charsize == 2)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300541 return sre_ucs2_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300542 assert(state->charsize == 4);
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300543 return sre_ucs4_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300544}
545
546LOCAL(Py_ssize_t)
547sre_search(SRE_STATE* state, SRE_CODE* pattern)
548{
549 if (state->charsize == 1)
550 return sre_ucs1_search(state, pattern);
551 if (state->charsize == 2)
552 return sre_ucs2_search(state, pattern);
553 assert(state->charsize == 4);
554 return sre_ucs4_search(state, pattern);
555}
556
Larry Hastings16c51912014-01-07 11:53:01 -0800557static PyObject *
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200558fix_string_param(PyObject *string, PyObject *string2, const char *oldname)
559{
560 if (string2 != NULL) {
561 if (string != NULL) {
562 PyErr_Format(PyExc_TypeError,
563 "Argument given by name ('%s') and position (1)",
564 oldname);
565 return NULL;
566 }
567 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
568 "The '%s' keyword parameter name is deprecated. "
569 "Use 'string' instead.", oldname) < 0)
570 return NULL;
571 return string2;
572 }
573 if (string == NULL) {
574 PyErr_SetString(PyExc_TypeError,
575 "Required argument 'string' (pos 1) not found");
576 return NULL;
577 }
578 return string;
579}
Larry Hastings16c51912014-01-07 11:53:01 -0800580
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300581/*[clinic input]
582_sre.SRE_Pattern.match
583
584 string: object = NULL
585 pos: Py_ssize_t = 0
586 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
587 *
588 pattern: object = NULL
589
590Matches zero or more characters at the beginning of the string.
591[clinic start generated code]*/
592
Larry Hastings16c51912014-01-07 11:53:01 -0800593static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300594_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
595 Py_ssize_t pos, Py_ssize_t endpos,
596 PyObject *pattern)
597/*[clinic end generated code: output=74b4b1da3bb2d84e input=3d079aa99979b81d]*/
Larry Hastings16c51912014-01-07 11:53:01 -0800598{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000599 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100600 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300601 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000602
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200603 string = fix_string_param(string, pattern, "pattern");
604 if (!string)
605 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300606 if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000607 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000608
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000609 state.ptr = state.start;
610
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000611 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
612
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300613 status = sre_match(&state, PatternObject_GetCode(self), 0);
Guido van Rossumb700df92000-03-31 14:59:30 +0000614
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000615 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300616 if (PyErr_Occurred()) {
617 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000618 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300619 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000620
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300621 match = pattern_new_match(self, &state, status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000622 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300623 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000624}
625
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300626/*[clinic input]
627_sre.SRE_Pattern.fullmatch
628
629 string: object = NULL
630 pos: Py_ssize_t = 0
631 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
632 *
633 pattern: object = NULL
634
635Matches against all of the string
636[clinic start generated code]*/
637
638static PyObject *
639_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
640 Py_ssize_t pos, Py_ssize_t endpos,
641 PyObject *pattern)
642/*[clinic end generated code: output=1c98bc5da744ea94 input=d4228606cc12580f]*/
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200643{
644 SRE_STATE state;
645 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300646 PyObject *match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200647
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300648 string = fix_string_param(string, pattern, "pattern");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200649 if (!string)
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200650 return NULL;
651
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300652 if (!state_init(&state, self, string, pos, endpos))
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200653 return NULL;
654
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200655 state.ptr = state.start;
656
657 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
658
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300659 status = sre_match(&state, PatternObject_GetCode(self), 1);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200660
661 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300662 if (PyErr_Occurred()) {
663 state_fini(&state);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200664 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300665 }
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200666
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300667 match = pattern_new_match(self, &state, status);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200668 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300669 return match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200670}
671
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300672/*[clinic input]
673_sre.SRE_Pattern.search
674
675 string: object = NULL
676 pos: Py_ssize_t = 0
677 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
678 *
679 pattern: object = NULL
680
681Scan through string looking for a match, and return a corresponding match object instance.
682
683Return None if no position in the string matches.
684[clinic start generated code]*/
685
686static PyObject *
687_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
688 Py_ssize_t pos, Py_ssize_t endpos,
689 PyObject *pattern)
690/*[clinic end generated code: output=3839394a18e5ea4f input=dab42720f4be3a4b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000691{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000692 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100693 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300694 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000695
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300696 string = fix_string_param(string, pattern, "pattern");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200697 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000698 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000699
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300700 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000701 return NULL;
702
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000703 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
704
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300705 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000706
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000707 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
708
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300709 if (PyErr_Occurred()) {
710 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000711 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300712 }
Thomas Wouters89f507f2006-12-13 04:49:30 +0000713
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300714 match = pattern_new_match(self, &state, status);
715 state_fini(&state);
716 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000717}
718
719static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000720call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000721{
722 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000723 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000724 PyObject* func;
725 PyObject* result;
726
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000727 if (!args)
728 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000729 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000730 if (!name)
731 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000732 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000733 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000734 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000735 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000736 func = PyObject_GetAttrString(mod, function);
737 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000738 if (!func)
739 return NULL;
740 result = PyObject_CallObject(func, args);
741 Py_DECREF(func);
742 Py_DECREF(args);
743 return result;
744}
745
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000746#ifdef USE_BUILTIN_COPY
747static int
748deepcopy(PyObject** object, PyObject* memo)
749{
750 PyObject* copy;
751
752 copy = call(
753 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000754 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000755 );
756 if (!copy)
757 return 0;
758
759 Py_DECREF(*object);
760 *object = copy;
761
762 return 1; /* success */
763}
764#endif
765
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300766/*[clinic input]
767_sre.SRE_Pattern.findall
768
769 string: object = NULL
770 pos: Py_ssize_t = 0
771 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
772 *
773 source: object = NULL
774
775Return a list of all non-overlapping matches of pattern in string.
776[clinic start generated code]*/
777
778static PyObject *
779_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
780 Py_ssize_t pos, Py_ssize_t endpos,
781 PyObject *source)
782/*[clinic end generated code: output=51295498b300639d input=df688355c056b9de]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000783{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000784 SRE_STATE state;
785 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100786 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000787 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000788
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300789 string = fix_string_param(string, source, "source");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200790 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000791 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000792
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300793 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000794 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000795
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000796 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000797 if (!list) {
798 state_fini(&state);
799 return NULL;
800 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000801
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000802 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000803
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000804 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000805
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000806 state_reset(&state);
807
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000808 state.ptr = state.start;
809
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300810 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300811 if (PyErr_Occurred())
812 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000813
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000814 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000815 if (status == 0)
816 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000817 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000818 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000819 }
Tim Peters3d563502006-01-21 02:47:53 +0000820
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000821 /* don't bother to build a match object */
822 switch (self->groups) {
823 case 0:
824 b = STATE_OFFSET(&state, state.start);
825 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300826 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300827 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000828 if (!item)
829 goto error;
830 break;
831 case 1:
832 item = state_getslice(&state, 1, string, 1);
833 if (!item)
834 goto error;
835 break;
836 default:
837 item = PyTuple_New(self->groups);
838 if (!item)
839 goto error;
840 for (i = 0; i < self->groups; i++) {
841 PyObject* o = state_getslice(&state, i+1, string, 1);
842 if (!o) {
843 Py_DECREF(item);
844 goto error;
845 }
846 PyTuple_SET_ITEM(item, i, o);
847 }
848 break;
849 }
850
851 status = PyList_Append(list, item);
852 Py_DECREF(item);
853 if (status < 0)
854 goto error;
855
856 if (state.ptr == state.start)
857 state.start = (void*) ((char*) state.ptr + state.charsize);
858 else
859 state.start = state.ptr;
860
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000861 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000862
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000863 state_fini(&state);
864 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000865
866error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000867 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000868 state_fini(&state);
869 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000870
Guido van Rossumb700df92000-03-31 14:59:30 +0000871}
872
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300873/*[clinic input]
874_sre.SRE_Pattern.finditer
875
876 string: object
877 pos: Py_ssize_t = 0
878 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
879
880Return an iterator over all non-overlapping matches for the RE pattern in string.
881
882For each match, the iterator returns a match object.
883[clinic start generated code]*/
884
885static PyObject *
886_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
887 Py_ssize_t pos, Py_ssize_t endpos)
888/*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
Fredrik Lundh703ce812001-10-24 22:16:30 +0000889{
890 PyObject* scanner;
891 PyObject* search;
892 PyObject* iterator;
893
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300894 scanner = pattern_scanner(self, string, pos, endpos);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000895 if (!scanner)
896 return NULL;
897
898 search = PyObject_GetAttrString(scanner, "search");
899 Py_DECREF(scanner);
900 if (!search)
901 return NULL;
902
903 iterator = PyCallIter_New(search, Py_None);
904 Py_DECREF(search);
905
906 return iterator;
907}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000908
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300909/*[clinic input]
910_sre.SRE_Pattern.scanner
911
912 string: object
913 pos: Py_ssize_t = 0
914 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
915
916[clinic start generated code]*/
917
918static PyObject *
919_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
920 Py_ssize_t pos, Py_ssize_t endpos)
921/*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
922{
923 return pattern_scanner(self, string, pos, endpos);
924}
925
926/*[clinic input]
927_sre.SRE_Pattern.split
928
929 string: object = NULL
930 maxsplit: Py_ssize_t = 0
931 *
932 source: object = NULL
933
934Split string by the occurrences of pattern.
935[clinic start generated code]*/
936
937static PyObject *
938_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
939 Py_ssize_t maxsplit, PyObject *source)
940/*[clinic end generated code: output=20bac2ff55b9f84c input=41e0b2e35e599d7b]*/
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000941{
942 SRE_STATE state;
943 PyObject* list;
944 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100945 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000946 Py_ssize_t n;
947 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000948 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000949
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300950 string = fix_string_param(string, source, "source");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200951 if (!string)
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000952 return NULL;
953
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200954 assert(self->codesize != 0);
955 if (self->code[0] != SRE_OP_INFO || self->code[3] == 0) {
956 if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) {
957 PyErr_SetString(PyExc_ValueError,
958 "split() requires a non-empty pattern match.");
959 return NULL;
960 }
961 if (PyErr_WarnEx(PyExc_FutureWarning,
962 "split() requires a non-empty pattern match.",
963 1) < 0)
964 return NULL;
965 }
966
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300967 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000968 return NULL;
969
970 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000971 if (!list) {
972 state_fini(&state);
973 return NULL;
974 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000975
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000976 n = 0;
977 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000978
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000979 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000980
981 state_reset(&state);
982
983 state.ptr = state.start;
984
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300985 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300986 if (PyErr_Occurred())
987 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000988
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000989 if (status <= 0) {
990 if (status == 0)
991 break;
992 pattern_error(status);
993 goto error;
994 }
Tim Peters3d563502006-01-21 02:47:53 +0000995
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000996 if (state.start == state.ptr) {
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +0300997 if (last == state.end || state.ptr == state.end)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000998 break;
999 /* skip one character */
1000 state.start = (void*) ((char*) state.ptr + state.charsize);
1001 continue;
1002 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001003
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001004 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001005 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001006 string, STATE_OFFSET(&state, last),
1007 STATE_OFFSET(&state, state.start)
1008 );
1009 if (!item)
1010 goto error;
1011 status = PyList_Append(list, item);
1012 Py_DECREF(item);
1013 if (status < 0)
1014 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001015
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001016 /* add groups (if any) */
1017 for (i = 0; i < self->groups; i++) {
1018 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001019 if (!item)
1020 goto error;
1021 status = PyList_Append(list, item);
1022 Py_DECREF(item);
1023 if (status < 0)
1024 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001025 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001026
1027 n = n + 1;
1028
1029 last = state.start = state.ptr;
1030
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001031 }
1032
Fredrik Lundhf864aa82001-10-22 06:01:56 +00001033 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001034 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +00001035 string, STATE_OFFSET(&state, last), state.endpos
1036 );
1037 if (!item)
1038 goto error;
1039 status = PyList_Append(list, item);
1040 Py_DECREF(item);
1041 if (status < 0)
1042 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001043
1044 state_fini(&state);
1045 return list;
1046
1047error:
1048 Py_DECREF(list);
1049 state_fini(&state);
1050 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001051
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001052}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001053
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001054static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001055pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001056 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001057{
1058 SRE_STATE state;
1059 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001060 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001061 PyObject* item;
1062 PyObject* filter;
1063 PyObject* args;
1064 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001065 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01001066 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001067 Py_ssize_t n;
1068 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001069 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001070 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001071 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001072
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001073 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001074 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001075 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001076 Py_INCREF(filter);
1077 filter_is_callable = 1;
1078 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001079 /* if not callable, check if it's a literal string */
1080 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001081 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001082 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001083 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001084 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001085 if (charsize == 1)
1086 literal = memchr(ptr, '\\', n) == NULL;
1087 else
1088 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001089 } else {
1090 PyErr_Clear();
1091 literal = 0;
1092 }
Benjamin Petersone48944b2012-03-07 14:50:25 -06001093 if (view.buf)
1094 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001095 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001096 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001097 Py_INCREF(filter);
1098 filter_is_callable = 0;
1099 } else {
1100 /* not a literal; hand it over to the template compiler */
1101 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001102 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001103 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001104 );
1105 if (!filter)
1106 return NULL;
1107 filter_is_callable = PyCallable_Check(filter);
1108 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00001109 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001110
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001111 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001112 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001113 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001114 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001115
1116 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001117 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001118 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001119 state_fini(&state);
1120 return NULL;
1121 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001122
1123 n = i = 0;
1124
1125 while (!count || n < count) {
1126
1127 state_reset(&state);
1128
1129 state.ptr = state.start;
1130
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001131 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001132 if (PyErr_Occurred())
1133 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001134
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001135 if (status <= 0) {
1136 if (status == 0)
1137 break;
1138 pattern_error(status);
1139 goto error;
1140 }
Tim Peters3d563502006-01-21 02:47:53 +00001141
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001142 b = STATE_OFFSET(&state, state.start);
1143 e = STATE_OFFSET(&state, state.ptr);
1144
1145 if (i < b) {
1146 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001147 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001148 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001149 if (!item)
1150 goto error;
1151 status = PyList_Append(list, item);
1152 Py_DECREF(item);
1153 if (status < 0)
1154 goto error;
1155
1156 } else if (i == b && i == e && n > 0)
1157 /* ignore empty match on latest position */
1158 goto next;
1159
1160 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001161 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001162 match = pattern_new_match(self, &state, 1);
1163 if (!match)
1164 goto error;
Raymond Hettinger8ae46892003-10-12 19:09:37 +00001165 args = PyTuple_Pack(1, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001166 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00001167 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001168 goto error;
1169 }
1170 item = PyObject_CallObject(filter, args);
1171 Py_DECREF(args);
1172 Py_DECREF(match);
1173 if (!item)
1174 goto error;
1175 } else {
1176 /* filter is literal string */
1177 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001178 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001179 }
1180
1181 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001182 if (item != Py_None) {
1183 status = PyList_Append(list, item);
1184 Py_DECREF(item);
1185 if (status < 0)
1186 goto error;
1187 }
Tim Peters3d563502006-01-21 02:47:53 +00001188
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001189 i = e;
1190 n = n + 1;
1191
1192next:
1193 /* move on */
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001194 if (state.ptr == state.end)
1195 break;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001196 if (state.ptr == state.start)
1197 state.start = (void*) ((char*) state.ptr + state.charsize);
1198 else
1199 state.start = state.ptr;
1200
1201 }
1202
1203 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001204 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001205 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001206 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001207 if (!item)
1208 goto error;
1209 status = PyList_Append(list, item);
1210 Py_DECREF(item);
1211 if (status < 0)
1212 goto error;
1213 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001214
1215 state_fini(&state);
1216
Guido van Rossum4e173842001-12-07 04:25:10 +00001217 Py_DECREF(filter);
1218
Fredrik Lundhdac58492001-10-21 21:48:30 +00001219 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001220 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001221 if (!joiner) {
1222 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001223 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001224 }
1225 if (PyList_GET_SIZE(list) == 0) {
1226 Py_DECREF(list);
1227 item = joiner;
1228 }
1229 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001230 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001231 item = _PyBytes_Join(joiner, list);
1232 else
1233 item = PyUnicode_Join(joiner, list);
1234 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001235 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001236 if (!item)
1237 return NULL;
1238 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001239
1240 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001241 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001242
1243 return item;
1244
1245error:
1246 Py_DECREF(list);
1247 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001248 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001249 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001250
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001251}
1252
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001253/*[clinic input]
1254_sre.SRE_Pattern.sub
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001255
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001256 repl: object
1257 string: object
1258 count: Py_ssize_t = 0
1259
1260Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1261[clinic start generated code]*/
1262
1263static PyObject *
1264_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1265 PyObject *string, Py_ssize_t count)
1266/*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1267{
1268 return pattern_subx(self, repl, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001269}
1270
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001271/*[clinic input]
1272_sre.SRE_Pattern.subn
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001273
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001274 repl: object
1275 string: object
1276 count: Py_ssize_t = 0
1277
1278Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1279[clinic start generated code]*/
1280
1281static PyObject *
1282_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1283 PyObject *string, Py_ssize_t count)
1284/*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1285{
1286 return pattern_subx(self, repl, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001287}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001288
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001289/*[clinic input]
1290_sre.SRE_Pattern.__copy__
1291
1292[clinic start generated code]*/
1293
1294static PyObject *
1295_sre_SRE_Pattern___copy___impl(PatternObject *self)
1296/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001297{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001298#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001299 PatternObject* copy;
1300 int offset;
1301
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001302 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
1303 if (!copy)
1304 return NULL;
1305
1306 offset = offsetof(PatternObject, groups);
1307
1308 Py_XINCREF(self->groupindex);
1309 Py_XINCREF(self->indexgroup);
1310 Py_XINCREF(self->pattern);
1311
1312 memcpy((char*) copy + offset, (char*) self + offset,
1313 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00001314 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001315
1316 return (PyObject*) copy;
1317#else
1318 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
1319 return NULL;
1320#endif
1321}
1322
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001323/*[clinic input]
1324_sre.SRE_Pattern.__deepcopy__
1325
1326 memo: object
1327
1328[clinic start generated code]*/
1329
1330static PyObject *
1331_sre_SRE_Pattern___deepcopy___impl(PatternObject *self, PyObject *memo)
1332/*[clinic end generated code: output=75efe69bd12c5d7d input=3959719482c07f70]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001333{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001334#ifdef USE_BUILTIN_COPY
1335 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00001336
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001337 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001338 if (!copy)
1339 return NULL;
1340
1341 if (!deepcopy(&copy->groupindex, memo) ||
1342 !deepcopy(&copy->indexgroup, memo) ||
1343 !deepcopy(&copy->pattern, memo)) {
1344 Py_DECREF(copy);
1345 return NULL;
1346 }
1347
1348#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001349 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
1350 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001351#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001352}
1353
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001354static PyObject *
1355pattern_repr(PatternObject *obj)
1356{
1357 static const struct {
1358 const char *name;
1359 int value;
1360 } flag_names[] = {
1361 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1362 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1363 {"re.LOCALE", SRE_FLAG_LOCALE},
1364 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1365 {"re.DOTALL", SRE_FLAG_DOTALL},
1366 {"re.UNICODE", SRE_FLAG_UNICODE},
1367 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1368 {"re.DEBUG", SRE_FLAG_DEBUG},
1369 {"re.ASCII", SRE_FLAG_ASCII},
1370 };
1371 PyObject *result = NULL;
1372 PyObject *flag_items;
Victor Stinner706768c2014-08-16 01:03:39 +02001373 size_t i;
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001374 int flags = obj->flags;
1375
1376 /* Omit re.UNICODE for valid string patterns. */
1377 if (obj->isbytes == 0 &&
1378 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1379 SRE_FLAG_UNICODE)
1380 flags &= ~SRE_FLAG_UNICODE;
1381
1382 flag_items = PyList_New(0);
1383 if (!flag_items)
1384 return NULL;
1385
1386 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1387 if (flags & flag_names[i].value) {
1388 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1389 if (!item)
1390 goto done;
1391
1392 if (PyList_Append(flag_items, item) < 0) {
1393 Py_DECREF(item);
1394 goto done;
1395 }
1396 Py_DECREF(item);
1397 flags &= ~flag_names[i].value;
1398 }
1399 }
1400 if (flags) {
1401 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1402 if (!item)
1403 goto done;
1404
1405 if (PyList_Append(flag_items, item) < 0) {
1406 Py_DECREF(item);
1407 goto done;
1408 }
1409 Py_DECREF(item);
1410 }
1411
1412 if (PyList_Size(flag_items) > 0) {
1413 PyObject *flags_result;
1414 PyObject *sep = PyUnicode_FromString("|");
1415 if (!sep)
1416 goto done;
1417 flags_result = PyUnicode_Join(sep, flag_items);
1418 Py_DECREF(sep);
1419 if (!flags_result)
1420 goto done;
1421 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1422 obj->pattern, flags_result);
1423 Py_DECREF(flags_result);
1424 }
1425 else {
1426 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1427 }
1428
1429done:
1430 Py_DECREF(flag_items);
1431 return result;
1432}
1433
Raymond Hettinger94478742004-09-24 04:31:19 +00001434PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
1435
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001436/* PatternObject's 'groupindex' method. */
1437static PyObject *
1438pattern_groupindex(PatternObject *self)
1439{
1440 return PyDictProxy_New(self->groupindex);
1441}
1442
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001443static int _validate(PatternObject *self); /* Forward */
1444
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001445/*[clinic input]
1446_sre.compile
1447
1448 pattern: object
1449 flags: int
1450 code: object(subclass_of='&PyList_Type')
1451 groups: Py_ssize_t
1452 groupindex: object
1453 indexgroup: object
1454
1455[clinic start generated code]*/
1456
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001457static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001458_sre_compile_impl(PyModuleDef *module, PyObject *pattern, int flags,
1459 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1460 PyObject *indexgroup)
1461/*[clinic end generated code: output=3004b293730bf309 input=7d059ec8ae1edb85]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001462{
1463 /* "compile" pattern descriptor to pattern object */
1464
1465 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001466 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001467
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001468 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001469 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001470 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1471 if (!self)
1472 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001473 self->weakreflist = NULL;
1474 self->pattern = NULL;
1475 self->groupindex = NULL;
1476 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001477
1478 self->codesize = n;
1479
1480 for (i = 0; i < n; i++) {
1481 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001482 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001483 self->code[i] = (SRE_CODE) value;
1484 if ((unsigned long) self->code[i] != value) {
1485 PyErr_SetString(PyExc_OverflowError,
1486 "regular expression code size limit exceeded");
1487 break;
1488 }
1489 }
1490
1491 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001492 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001493 return NULL;
1494 }
1495
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001496 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001497 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001498 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001499 else {
1500 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001501 int charsize;
1502 Py_buffer view;
1503 view.buf = NULL;
1504 if (!getstring(pattern, &p_length, &self->isbytes,
1505 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001506 Py_DECREF(self);
1507 return NULL;
1508 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001509 if (view.buf)
1510 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001511 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001512
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001513 Py_INCREF(pattern);
1514 self->pattern = pattern;
1515
1516 self->flags = flags;
1517
1518 self->groups = groups;
1519
1520 Py_XINCREF(groupindex);
1521 self->groupindex = groupindex;
1522
1523 Py_XINCREF(indexgroup);
1524 self->indexgroup = indexgroup;
1525
1526 self->weakreflist = NULL;
1527
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001528 if (!_validate(self)) {
1529 Py_DECREF(self);
1530 return NULL;
1531 }
1532
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001533 return (PyObject*) self;
1534}
1535
Guido van Rossumb700df92000-03-31 14:59:30 +00001536/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001537/* Code validation */
1538
1539/* To learn more about this code, have a look at the _compile() function in
1540 Lib/sre_compile.py. The validation functions below checks the code array
1541 for conformance with the code patterns generated there.
1542
1543 The nice thing about the generated code is that it is position-independent:
1544 all jumps are relative jumps forward. Also, jumps don't cross each other:
1545 the target of a later jump is always earlier than the target of an earlier
1546 jump. IOW, this is okay:
1547
1548 J---------J-------T--------T
1549 \ \_____/ /
1550 \______________________/
1551
1552 but this is not:
1553
1554 J---------J-------T--------T
1555 \_________\_____/ /
1556 \____________/
1557
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001558 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001559*/
1560
1561/* Defining this one enables tracing of the validator */
1562#undef VVERBOSE
1563
1564/* Trace macro for the validator */
1565#if defined(VVERBOSE)
1566#define VTRACE(v) printf v
1567#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001568#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001569#endif
1570
1571/* Report failure */
1572#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1573
1574/* Extract opcode, argument, or skip count from code array */
1575#define GET_OP \
1576 do { \
1577 VTRACE(("%p: ", code)); \
1578 if (code >= end) FAIL; \
1579 op = *code++; \
1580 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1581 } while (0)
1582#define GET_ARG \
1583 do { \
1584 VTRACE(("%p= ", code)); \
1585 if (code >= end) FAIL; \
1586 arg = *code++; \
1587 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1588 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001589#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001590 do { \
1591 VTRACE(("%p= ", code)); \
1592 if (code >= end) FAIL; \
1593 skip = *code; \
1594 VTRACE(("%lu (skip to %p)\n", \
1595 (unsigned long)skip, code+skip)); \
Victor Stinner1fa174a2013-08-28 02:06:21 +02001596 if (skip-adj > (Py_uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001597 FAIL; \
1598 code++; \
1599 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001600#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001601
1602static int
1603_validate_charset(SRE_CODE *code, SRE_CODE *end)
1604{
1605 /* Some variables are manipulated by the macros above */
1606 SRE_CODE op;
1607 SRE_CODE arg;
1608 SRE_CODE offset;
1609 int i;
1610
1611 while (code < end) {
1612 GET_OP;
1613 switch (op) {
1614
1615 case SRE_OP_NEGATE:
1616 break;
1617
1618 case SRE_OP_LITERAL:
1619 GET_ARG;
1620 break;
1621
1622 case SRE_OP_RANGE:
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +02001623 case SRE_OP_RANGE_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001624 GET_ARG;
1625 GET_ARG;
1626 break;
1627
1628 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001629 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001630 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001631 FAIL;
1632 code += offset;
1633 break;
1634
1635 case SRE_OP_BIGCHARSET:
1636 GET_ARG; /* Number of blocks */
1637 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001638 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001639 FAIL;
1640 /* Make sure that each byte points to a valid block */
1641 for (i = 0; i < 256; i++) {
1642 if (((unsigned char *)code)[i] >= arg)
1643 FAIL;
1644 }
1645 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001646 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001647 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001648 FAIL;
1649 code += offset;
1650 break;
1651
1652 case SRE_OP_CATEGORY:
1653 GET_ARG;
1654 switch (arg) {
1655 case SRE_CATEGORY_DIGIT:
1656 case SRE_CATEGORY_NOT_DIGIT:
1657 case SRE_CATEGORY_SPACE:
1658 case SRE_CATEGORY_NOT_SPACE:
1659 case SRE_CATEGORY_WORD:
1660 case SRE_CATEGORY_NOT_WORD:
1661 case SRE_CATEGORY_LINEBREAK:
1662 case SRE_CATEGORY_NOT_LINEBREAK:
1663 case SRE_CATEGORY_LOC_WORD:
1664 case SRE_CATEGORY_LOC_NOT_WORD:
1665 case SRE_CATEGORY_UNI_DIGIT:
1666 case SRE_CATEGORY_UNI_NOT_DIGIT:
1667 case SRE_CATEGORY_UNI_SPACE:
1668 case SRE_CATEGORY_UNI_NOT_SPACE:
1669 case SRE_CATEGORY_UNI_WORD:
1670 case SRE_CATEGORY_UNI_NOT_WORD:
1671 case SRE_CATEGORY_UNI_LINEBREAK:
1672 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1673 break;
1674 default:
1675 FAIL;
1676 }
1677 break;
1678
1679 default:
1680 FAIL;
1681
1682 }
1683 }
1684
1685 return 1;
1686}
1687
1688static int
1689_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1690{
1691 /* Some variables are manipulated by the macros above */
1692 SRE_CODE op;
1693 SRE_CODE arg;
1694 SRE_CODE skip;
1695
1696 VTRACE(("code=%p, end=%p\n", code, end));
1697
1698 if (code > end)
1699 FAIL;
1700
1701 while (code < end) {
1702 GET_OP;
1703 switch (op) {
1704
1705 case SRE_OP_MARK:
1706 /* We don't check whether marks are properly nested; the
1707 sre_match() code is robust even if they don't, and the worst
1708 you can get is nonsensical match results. */
1709 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001710 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001711 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1712 FAIL;
1713 }
1714 break;
1715
1716 case SRE_OP_LITERAL:
1717 case SRE_OP_NOT_LITERAL:
1718 case SRE_OP_LITERAL_IGNORE:
1719 case SRE_OP_NOT_LITERAL_IGNORE:
1720 GET_ARG;
1721 /* The arg is just a character, nothing to check */
1722 break;
1723
1724 case SRE_OP_SUCCESS:
1725 case SRE_OP_FAILURE:
1726 /* Nothing to check; these normally end the matching process */
1727 break;
1728
1729 case SRE_OP_AT:
1730 GET_ARG;
1731 switch (arg) {
1732 case SRE_AT_BEGINNING:
1733 case SRE_AT_BEGINNING_STRING:
1734 case SRE_AT_BEGINNING_LINE:
1735 case SRE_AT_END:
1736 case SRE_AT_END_LINE:
1737 case SRE_AT_END_STRING:
1738 case SRE_AT_BOUNDARY:
1739 case SRE_AT_NON_BOUNDARY:
1740 case SRE_AT_LOC_BOUNDARY:
1741 case SRE_AT_LOC_NON_BOUNDARY:
1742 case SRE_AT_UNI_BOUNDARY:
1743 case SRE_AT_UNI_NON_BOUNDARY:
1744 break;
1745 default:
1746 FAIL;
1747 }
1748 break;
1749
1750 case SRE_OP_ANY:
1751 case SRE_OP_ANY_ALL:
1752 /* These have no operands */
1753 break;
1754
1755 case SRE_OP_IN:
1756 case SRE_OP_IN_IGNORE:
1757 GET_SKIP;
1758 /* Stop 1 before the end; we check the FAILURE below */
1759 if (!_validate_charset(code, code+skip-2))
1760 FAIL;
1761 if (code[skip-2] != SRE_OP_FAILURE)
1762 FAIL;
1763 code += skip-1;
1764 break;
1765
1766 case SRE_OP_INFO:
1767 {
1768 /* A minimal info field is
1769 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1770 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1771 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001772 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001773 SRE_CODE *newcode;
1774 GET_SKIP;
1775 newcode = code+skip-1;
1776 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001777 GET_ARG;
1778 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001779 /* Check that only valid flags are present */
1780 if ((flags & ~(SRE_INFO_PREFIX |
1781 SRE_INFO_LITERAL |
1782 SRE_INFO_CHARSET)) != 0)
1783 FAIL;
1784 /* PREFIX and CHARSET are mutually exclusive */
1785 if ((flags & SRE_INFO_PREFIX) &&
1786 (flags & SRE_INFO_CHARSET))
1787 FAIL;
1788 /* LITERAL implies PREFIX */
1789 if ((flags & SRE_INFO_LITERAL) &&
1790 !(flags & SRE_INFO_PREFIX))
1791 FAIL;
1792 /* Validate the prefix */
1793 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001794 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001795 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001796 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001797 /* Here comes the prefix string */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001798 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001799 FAIL;
1800 code += prefix_len;
1801 /* And here comes the overlap table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001802 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001803 FAIL;
1804 /* Each overlap value should be < prefix_len */
1805 for (i = 0; i < prefix_len; i++) {
1806 if (code[i] >= prefix_len)
1807 FAIL;
1808 }
1809 code += prefix_len;
1810 }
1811 /* Validate the charset */
1812 if (flags & SRE_INFO_CHARSET) {
1813 if (!_validate_charset(code, newcode-1))
1814 FAIL;
1815 if (newcode[-1] != SRE_OP_FAILURE)
1816 FAIL;
1817 code = newcode;
1818 }
1819 else if (code != newcode) {
1820 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1821 FAIL;
1822 }
1823 }
1824 break;
1825
1826 case SRE_OP_BRANCH:
1827 {
1828 SRE_CODE *target = NULL;
1829 for (;;) {
1830 GET_SKIP;
1831 if (skip == 0)
1832 break;
1833 /* Stop 2 before the end; we check the JUMP below */
1834 if (!_validate_inner(code, code+skip-3, groups))
1835 FAIL;
1836 code += skip-3;
1837 /* Check that it ends with a JUMP, and that each JUMP
1838 has the same target */
1839 GET_OP;
1840 if (op != SRE_OP_JUMP)
1841 FAIL;
1842 GET_SKIP;
1843 if (target == NULL)
1844 target = code+skip-1;
1845 else if (code+skip-1 != target)
1846 FAIL;
1847 }
1848 }
1849 break;
1850
1851 case SRE_OP_REPEAT_ONE:
1852 case SRE_OP_MIN_REPEAT_ONE:
1853 {
1854 SRE_CODE min, max;
1855 GET_SKIP;
1856 GET_ARG; min = arg;
1857 GET_ARG; max = arg;
1858 if (min > max)
1859 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001860 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001861 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001862 if (!_validate_inner(code, code+skip-4, groups))
1863 FAIL;
1864 code += skip-4;
1865 GET_OP;
1866 if (op != SRE_OP_SUCCESS)
1867 FAIL;
1868 }
1869 break;
1870
1871 case SRE_OP_REPEAT:
1872 {
1873 SRE_CODE min, max;
1874 GET_SKIP;
1875 GET_ARG; min = arg;
1876 GET_ARG; max = arg;
1877 if (min > max)
1878 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001879 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001880 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001881 if (!_validate_inner(code, code+skip-3, groups))
1882 FAIL;
1883 code += skip-3;
1884 GET_OP;
1885 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1886 FAIL;
1887 }
1888 break;
1889
1890 case SRE_OP_GROUPREF:
1891 case SRE_OP_GROUPREF_IGNORE:
1892 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001893 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001894 FAIL;
1895 break;
1896
1897 case SRE_OP_GROUPREF_EXISTS:
1898 /* The regex syntax for this is: '(?(group)then|else)', where
1899 'group' is either an integer group number or a group name,
1900 'then' and 'else' are sub-regexes, and 'else' is optional. */
1901 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001902 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001903 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001904 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001905 code--; /* The skip is relative to the first arg! */
1906 /* There are two possibilities here: if there is both a 'then'
1907 part and an 'else' part, the generated code looks like:
1908
1909 GROUPREF_EXISTS
1910 <group>
1911 <skipyes>
1912 ...then part...
1913 JUMP
1914 <skipno>
1915 (<skipyes> jumps here)
1916 ...else part...
1917 (<skipno> jumps here)
1918
1919 If there is only a 'then' part, it looks like:
1920
1921 GROUPREF_EXISTS
1922 <group>
1923 <skip>
1924 ...then part...
1925 (<skip> jumps here)
1926
1927 There is no direct way to decide which it is, and we don't want
1928 to allow arbitrary jumps anywhere in the code; so we just look
1929 for a JUMP opcode preceding our skip target.
1930 */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001931 if (skip >= 3 && skip-3 < (Py_uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001932 code[skip-3] == SRE_OP_JUMP)
1933 {
1934 VTRACE(("both then and else parts present\n"));
1935 if (!_validate_inner(code+1, code+skip-3, groups))
1936 FAIL;
1937 code += skip-2; /* Position after JUMP, at <skipno> */
1938 GET_SKIP;
1939 if (!_validate_inner(code, code+skip-1, groups))
1940 FAIL;
1941 code += skip-1;
1942 }
1943 else {
1944 VTRACE(("only a then part present\n"));
1945 if (!_validate_inner(code+1, code+skip-1, groups))
1946 FAIL;
1947 code += skip-1;
1948 }
1949 break;
1950
1951 case SRE_OP_ASSERT:
1952 case SRE_OP_ASSERT_NOT:
1953 GET_SKIP;
1954 GET_ARG; /* 0 for lookahead, width for lookbehind */
1955 code--; /* Back up over arg to simplify math below */
1956 if (arg & 0x80000000)
1957 FAIL; /* Width too large */
1958 /* Stop 1 before the end; we check the SUCCESS below */
1959 if (!_validate_inner(code+1, code+skip-2, groups))
1960 FAIL;
1961 code += skip-2;
1962 GET_OP;
1963 if (op != SRE_OP_SUCCESS)
1964 FAIL;
1965 break;
1966
1967 default:
1968 FAIL;
1969
1970 }
1971 }
1972
1973 VTRACE(("okay\n"));
1974 return 1;
1975}
1976
1977static int
1978_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1979{
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001980 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1981 code >= end || end[-1] != SRE_OP_SUCCESS)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001982 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001983 return _validate_inner(code, end-1, groups);
1984}
1985
1986static int
1987_validate(PatternObject *self)
1988{
1989 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1990 {
1991 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1992 return 0;
1993 }
1994 else
1995 VTRACE(("Success!\n"));
1996 return 1;
1997}
1998
1999/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00002000/* match methods */
2001
2002static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002003match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002004{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002005 Py_XDECREF(self->regs);
2006 Py_XDECREF(self->string);
2007 Py_DECREF(self->pattern);
2008 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00002009}
2010
2011static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002012match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00002013{
Serhiy Storchaka25324972013-10-16 12:46:28 +03002014 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002015 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03002016 Py_buffer view;
2017 PyObject *result;
2018 void* ptr;
2019
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002020 if (index < 0 || index >= self->groups) {
2021 /* raise IndexError if we were given a bad group number */
2022 PyErr_SetString(
2023 PyExc_IndexError,
2024 "no such group"
2025 );
2026 return NULL;
2027 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002028
Fredrik Lundh6f013982000-07-03 18:44:21 +00002029 index *= 2;
2030
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002031 if (self->string == Py_None || self->mark[index] < 0) {
2032 /* return default value if the string or group is undefined */
2033 Py_INCREF(def);
2034 return def;
2035 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002036
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002037 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03002038 if (ptr == NULL)
2039 return NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002040 result = getslice(isbytes, ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +03002041 self->string, self->mark[index], self->mark[index+1]);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002042 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03002043 PyBuffer_Release(&view);
2044 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002045}
2046
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002047static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002048match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002049{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002050 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002051
Guido van Rossumddefaf32007-01-14 03:31:43 +00002052 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002053 /* Default value */
2054 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00002055
Christian Heimes217cfd12007-12-02 14:31:20 +00002056 if (PyLong_Check(index))
2057 return PyLong_AsSsize_t(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00002058
Fredrik Lundh6f013982000-07-03 18:44:21 +00002059 i = -1;
2060
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002061 if (self->pattern->groupindex) {
2062 index = PyObject_GetItem(self->pattern->groupindex, index);
2063 if (index) {
Neal Norwitz1fe5f382007-08-31 04:32:55 +00002064 if (PyLong_Check(index))
Christian Heimes217cfd12007-12-02 14:31:20 +00002065 i = PyLong_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00002066 Py_DECREF(index);
2067 } else
2068 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002069 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002070
2071 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002072}
2073
2074static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002075match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002076{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002077 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002078}
2079
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002080/*[clinic input]
2081_sre.SRE_Match.expand
2082
2083 template: object
2084
2085Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
2086[clinic start generated code]*/
2087
2088static PyObject *
2089_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
2090/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002091{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002092 /* delegate to Python code */
2093 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00002094 SRE_PY_MODULE, "_expand",
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002095 PyTuple_Pack(3, self->pattern, self, template)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002096 );
2097}
2098
2099static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002100match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002101{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002102 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002103 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002104
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002105 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002106
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002107 switch (size) {
2108 case 0:
2109 result = match_getslice(self, Py_False, Py_None);
2110 break;
2111 case 1:
2112 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2113 break;
2114 default:
2115 /* fetch multiple items */
2116 result = PyTuple_New(size);
2117 if (!result)
2118 return NULL;
2119 for (i = 0; i < size; i++) {
2120 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002121 self, PyTuple_GET_ITEM(args, i), Py_None
2122 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002123 if (!item) {
2124 Py_DECREF(result);
2125 return NULL;
2126 }
2127 PyTuple_SET_ITEM(result, i, item);
2128 }
2129 break;
2130 }
2131 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002132}
2133
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002134/*[clinic input]
2135_sre.SRE_Match.groups
2136
2137 default: object = None
2138 Is used for groups that did not participate in the match.
2139
2140Return a tuple containing all the subgroups of the match, from 1.
2141[clinic start generated code]*/
2142
2143static PyObject *
2144_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2145/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002146{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002147 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002148 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002149
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002150 result = PyTuple_New(self->groups-1);
2151 if (!result)
2152 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002153
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002154 for (index = 1; index < self->groups; index++) {
2155 PyObject* item;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002156 item = match_getslice_by_index(self, index, default_value);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002157 if (!item) {
2158 Py_DECREF(result);
2159 return NULL;
2160 }
2161 PyTuple_SET_ITEM(result, index-1, item);
2162 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002163
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002164 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002165}
2166
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002167/*[clinic input]
2168_sre.SRE_Match.groupdict
2169
2170 default: object = None
2171 Is used for groups that did not participate in the match.
2172
2173Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2174[clinic start generated code]*/
2175
2176static PyObject *
2177_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2178/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002179{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002180 PyObject* result;
2181 PyObject* keys;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002182 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002183
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002184 result = PyDict_New();
2185 if (!result || !self->pattern->groupindex)
2186 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002187
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002188 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002189 if (!keys)
2190 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002191
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002192 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002193 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002194 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002195 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002196 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002197 if (!key)
2198 goto failed;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002199 value = match_getslice(self, key, default_value);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002200 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002201 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002202 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002203 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002204 status = PyDict_SetItem(result, key, value);
2205 Py_DECREF(value);
2206 if (status < 0)
2207 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002208 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002209
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002210 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002211
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002212 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002213
2214failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00002215 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002216 Py_DECREF(result);
2217 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002218}
2219
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002220/*[clinic input]
2221_sre.SRE_Match.start -> Py_ssize_t
2222
2223 group: object(c_default="NULL") = 0
2224 /
2225
2226Return index of the start of the substring matched by group.
2227[clinic start generated code]*/
2228
2229static Py_ssize_t
2230_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2231/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002232{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002233 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002234
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002235 if (index < 0 || index >= self->groups) {
2236 PyErr_SetString(
2237 PyExc_IndexError,
2238 "no such group"
2239 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002240 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002241 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002242
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002243 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002244 return self->mark[index*2];
Guido van Rossumb700df92000-03-31 14:59:30 +00002245}
2246
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002247/*[clinic input]
2248_sre.SRE_Match.end -> Py_ssize_t
2249
2250 group: object(c_default="NULL") = 0
2251 /
2252
2253Return index of the end of the substring matched by group.
2254[clinic start generated code]*/
2255
2256static Py_ssize_t
2257_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2258/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002259{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002260 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002261
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002262 if (index < 0 || index >= self->groups) {
2263 PyErr_SetString(
2264 PyExc_IndexError,
2265 "no such group"
2266 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002267 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002268 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002269
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002270 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002271 return self->mark[index*2+1];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002272}
2273
2274LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002275_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002276{
2277 PyObject* pair;
2278 PyObject* item;
2279
2280 pair = PyTuple_New(2);
2281 if (!pair)
2282 return NULL;
2283
Christian Heimes217cfd12007-12-02 14:31:20 +00002284 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002285 if (!item)
2286 goto error;
2287 PyTuple_SET_ITEM(pair, 0, item);
2288
Christian Heimes217cfd12007-12-02 14:31:20 +00002289 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002290 if (!item)
2291 goto error;
2292 PyTuple_SET_ITEM(pair, 1, item);
2293
2294 return pair;
2295
2296 error:
2297 Py_DECREF(pair);
2298 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002299}
2300
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002301/*[clinic input]
2302_sre.SRE_Match.span
2303
2304 group: object(c_default="NULL") = 0
2305 /
2306
2307For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).
2308[clinic start generated code]*/
2309
2310static PyObject *
2311_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2312/*[clinic end generated code: output=f02ae40594d14fe6 input=49092b6008d176d3]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002313{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002314 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002315
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002316 if (index < 0 || index >= self->groups) {
2317 PyErr_SetString(
2318 PyExc_IndexError,
2319 "no such group"
2320 );
2321 return NULL;
2322 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002323
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002324 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002325 return _pair(self->mark[index*2], self->mark[index*2+1]);
2326}
2327
2328static PyObject*
2329match_regs(MatchObject* self)
2330{
2331 PyObject* regs;
2332 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002333 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002334
2335 regs = PyTuple_New(self->groups);
2336 if (!regs)
2337 return NULL;
2338
2339 for (index = 0; index < self->groups; index++) {
2340 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2341 if (!item) {
2342 Py_DECREF(regs);
2343 return NULL;
2344 }
2345 PyTuple_SET_ITEM(regs, index, item);
2346 }
2347
2348 Py_INCREF(regs);
2349 self->regs = regs;
2350
2351 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002352}
2353
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002354/*[clinic input]
2355_sre.SRE_Match.__copy__
2356
2357[clinic start generated code]*/
2358
2359static PyObject *
2360_sre_SRE_Match___copy___impl(MatchObject *self)
2361/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002362{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002363#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002364 MatchObject* copy;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002365 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00002366
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002367 slots = 2 * (self->pattern->groups+1);
2368
2369 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2370 if (!copy)
2371 return NULL;
2372
2373 /* this value a constant, but any compiler should be able to
2374 figure that out all by itself */
2375 offset = offsetof(MatchObject, string);
2376
2377 Py_XINCREF(self->pattern);
2378 Py_XINCREF(self->string);
2379 Py_XINCREF(self->regs);
2380
2381 memcpy((char*) copy + offset, (char*) self + offset,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002382 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002383
2384 return (PyObject*) copy;
2385#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002386 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002387 return NULL;
2388#endif
2389}
2390
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002391/*[clinic input]
2392_sre.SRE_Match.__deepcopy__
2393
2394 memo: object
2395
2396[clinic start generated code]*/
2397
2398static PyObject *
2399_sre_SRE_Match___deepcopy___impl(MatchObject *self, PyObject *memo)
2400/*[clinic end generated code: output=2b657578eb03f4a3 input=b65b72489eac64cc]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002401{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002402#ifdef USE_BUILTIN_COPY
2403 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002404
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002405 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002406 if (!copy)
2407 return NULL;
2408
2409 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2410 !deepcopy(&copy->string, memo) ||
2411 !deepcopy(&copy->regs, memo)) {
2412 Py_DECREF(copy);
2413 return NULL;
2414 }
2415
2416#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002417 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2418 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002419#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002420}
2421
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002422PyDoc_STRVAR(match_doc,
2423"The result of re.match() and re.search().\n\
2424Match objects always have a boolean value of True.");
2425
2426PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002427"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002428 Return subgroup(s) of the match by indices or names.\n\
2429 For 0 returns the entire match.");
2430
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002431static PyObject *
2432match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002433{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002434 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002435 return PyLong_FromSsize_t(self->lastindex);
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002436 Py_INCREF(Py_None);
2437 return Py_None;
Guido van Rossumb700df92000-03-31 14:59:30 +00002438}
2439
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002440static PyObject *
2441match_lastgroup_get(MatchObject *self)
2442{
2443 if (self->pattern->indexgroup && self->lastindex >= 0) {
2444 PyObject* result = PySequence_GetItem(
2445 self->pattern->indexgroup, self->lastindex
2446 );
2447 if (result)
2448 return result;
2449 PyErr_Clear();
2450 }
2451 Py_INCREF(Py_None);
2452 return Py_None;
2453}
2454
2455static PyObject *
2456match_regs_get(MatchObject *self)
2457{
2458 if (self->regs) {
2459 Py_INCREF(self->regs);
2460 return self->regs;
2461 } else
2462 return match_regs(self);
2463}
2464
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002465static PyObject *
2466match_repr(MatchObject *self)
2467{
2468 PyObject *result;
2469 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2470 if (group0 == NULL)
2471 return NULL;
2472 result = PyUnicode_FromFormat(
2473 "<%s object; span=(%d, %d), match=%.50R>",
2474 Py_TYPE(self)->tp_name,
2475 self->mark[0], self->mark[1], group0);
2476 Py_DECREF(group0);
2477 return result;
2478}
2479
2480
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002481static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002482pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002483{
2484 /* create match object (from state object) */
2485
2486 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002487 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002488 char* base;
2489 int n;
2490
2491 if (status > 0) {
2492
2493 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002494 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002495 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2496 2*(pattern->groups+1));
2497 if (!match)
2498 return NULL;
2499
2500 Py_INCREF(pattern);
2501 match->pattern = pattern;
2502
2503 Py_INCREF(state->string);
2504 match->string = state->string;
2505
2506 match->regs = NULL;
2507 match->groups = pattern->groups+1;
2508
2509 /* fill in group slices */
2510
2511 base = (char*) state->beginning;
2512 n = state->charsize;
2513
2514 match->mark[0] = ((char*) state->start - base) / n;
2515 match->mark[1] = ((char*) state->ptr - base) / n;
2516
2517 for (i = j = 0; i < pattern->groups; i++, j+=2)
2518 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2519 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2520 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2521 } else
2522 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2523
2524 match->pos = state->pos;
2525 match->endpos = state->endpos;
2526
2527 match->lastindex = state->lastindex;
2528
2529 return (PyObject*) match;
2530
2531 } else if (status == 0) {
2532
2533 /* no match */
2534 Py_INCREF(Py_None);
2535 return Py_None;
2536
2537 }
2538
2539 /* internal error */
2540 pattern_error(status);
2541 return NULL;
2542}
2543
2544
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002545/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002546/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002547
2548static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002549scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002550{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002551 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002552 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002553 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002554}
2555
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002556/*[clinic input]
2557_sre.SRE_Scanner.match
2558
2559[clinic start generated code]*/
2560
2561static PyObject *
2562_sre_SRE_Scanner_match_impl(ScannerObject *self)
2563/*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002564{
2565 SRE_STATE* state = &self->state;
2566 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002567 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002568
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002569 if (state->start == NULL)
2570 Py_RETURN_NONE;
2571
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002572 state_reset(state);
2573
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002574 state->ptr = state->start;
2575
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03002576 status = sre_match(state, PatternObject_GetCode(self->pattern), 0);
Thomas Wouters89f507f2006-12-13 04:49:30 +00002577 if (PyErr_Occurred())
2578 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002579
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002580 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002581 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002582
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002583 if (status == 0)
2584 state->start = NULL;
2585 else if (state->ptr != state->start)
2586 state->start = state->ptr;
2587 else if (state->ptr != state->end)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002588 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002589 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002590 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002591
2592 return match;
2593}
2594
2595
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002596/*[clinic input]
2597_sre.SRE_Scanner.search
2598
2599[clinic start generated code]*/
2600
2601static PyObject *
2602_sre_SRE_Scanner_search_impl(ScannerObject *self)
2603/*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002604{
2605 SRE_STATE* state = &self->state;
2606 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002607 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002608
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002609 if (state->start == NULL)
2610 Py_RETURN_NONE;
2611
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002612 state_reset(state);
2613
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002614 state->ptr = state->start;
2615
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002616 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002617 if (PyErr_Occurred())
2618 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002619
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002620 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002621 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002622
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002623 if (status == 0)
2624 state->start = NULL;
2625 else if (state->ptr != state->start)
2626 state->start = state->ptr;
2627 else if (state->ptr != state->end)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002628 state->start = (void*) ((char*) state->ptr + state->charsize);
2629 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002630 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002631
2632 return match;
2633}
2634
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002635static PyObject *
2636pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002637{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002638 ScannerObject* scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002639
2640 /* create scanner object */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002641 scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2642 if (!scanner)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002643 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002644 scanner->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002645
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002646 /* create search state object */
2647 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2648 Py_DECREF(scanner);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002649 return NULL;
2650 }
2651
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002652 Py_INCREF(self);
2653 scanner->pattern = (PyObject*) self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002654
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002655 return (PyObject*) scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002656}
2657
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002658#include "clinic/_sre.c.h"
2659
2660static PyMethodDef pattern_methods[] = {
2661 _SRE_SRE_PATTERN_MATCH_METHODDEF
2662 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2663 _SRE_SRE_PATTERN_SEARCH_METHODDEF
2664 _SRE_SRE_PATTERN_SUB_METHODDEF
2665 _SRE_SRE_PATTERN_SUBN_METHODDEF
2666 _SRE_SRE_PATTERN_FINDALL_METHODDEF
2667 _SRE_SRE_PATTERN_SPLIT_METHODDEF
2668 _SRE_SRE_PATTERN_FINDITER_METHODDEF
2669 _SRE_SRE_PATTERN_SCANNER_METHODDEF
2670 _SRE_SRE_PATTERN___COPY___METHODDEF
2671 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2672 {NULL, NULL}
2673};
2674
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002675static PyGetSetDef pattern_getset[] = {
2676 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2677 "A dictionary mapping group names to group numbers."},
2678 {NULL} /* Sentinel */
2679};
2680
2681#define PAT_OFF(x) offsetof(PatternObject, x)
2682static PyMemberDef pattern_members[] = {
2683 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
2684 {"flags", T_INT, PAT_OFF(flags), READONLY},
2685 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
2686 {NULL} /* Sentinel */
2687};
2688
2689static PyTypeObject Pattern_Type = {
2690 PyVarObject_HEAD_INIT(NULL, 0)
2691 "_" SRE_MODULE ".SRE_Pattern",
2692 sizeof(PatternObject), sizeof(SRE_CODE),
2693 (destructor)pattern_dealloc, /* tp_dealloc */
2694 0, /* tp_print */
2695 0, /* tp_getattr */
2696 0, /* tp_setattr */
2697 0, /* tp_reserved */
2698 (reprfunc)pattern_repr, /* tp_repr */
2699 0, /* tp_as_number */
2700 0, /* tp_as_sequence */
2701 0, /* tp_as_mapping */
2702 0, /* tp_hash */
2703 0, /* tp_call */
2704 0, /* tp_str */
2705 0, /* tp_getattro */
2706 0, /* tp_setattro */
2707 0, /* tp_as_buffer */
2708 Py_TPFLAGS_DEFAULT, /* tp_flags */
2709 pattern_doc, /* tp_doc */
2710 0, /* tp_traverse */
2711 0, /* tp_clear */
2712 0, /* tp_richcompare */
2713 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2714 0, /* tp_iter */
2715 0, /* tp_iternext */
2716 pattern_methods, /* tp_methods */
2717 pattern_members, /* tp_members */
2718 pattern_getset, /* tp_getset */
2719};
2720
2721
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002722static PyMethodDef match_methods[] = {
2723 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2724 _SRE_SRE_MATCH_START_METHODDEF
2725 _SRE_SRE_MATCH_END_METHODDEF
2726 _SRE_SRE_MATCH_SPAN_METHODDEF
2727 _SRE_SRE_MATCH_GROUPS_METHODDEF
2728 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2729 _SRE_SRE_MATCH_EXPAND_METHODDEF
2730 _SRE_SRE_MATCH___COPY___METHODDEF
2731 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2732 {NULL, NULL}
2733};
2734
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002735static PyGetSetDef match_getset[] = {
2736 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
2737 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
2738 {"regs", (getter)match_regs_get, (setter)NULL},
2739 {NULL}
2740};
2741
2742#define MATCH_OFF(x) offsetof(MatchObject, x)
2743static PyMemberDef match_members[] = {
2744 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
2745 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
2746 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
2747 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
2748 {NULL}
2749};
2750
2751/* FIXME: implement setattr("string", None) as a special case (to
2752 detach the associated string, if any */
2753
2754static PyTypeObject Match_Type = {
2755 PyVarObject_HEAD_INIT(NULL,0)
2756 "_" SRE_MODULE ".SRE_Match",
2757 sizeof(MatchObject), sizeof(Py_ssize_t),
2758 (destructor)match_dealloc, /* tp_dealloc */
2759 0, /* tp_print */
2760 0, /* tp_getattr */
2761 0, /* tp_setattr */
2762 0, /* tp_reserved */
2763 (reprfunc)match_repr, /* tp_repr */
2764 0, /* tp_as_number */
2765 0, /* tp_as_sequence */
2766 0, /* tp_as_mapping */
2767 0, /* tp_hash */
2768 0, /* tp_call */
2769 0, /* tp_str */
2770 0, /* tp_getattro */
2771 0, /* tp_setattro */
2772 0, /* tp_as_buffer */
2773 Py_TPFLAGS_DEFAULT, /* tp_flags */
2774 match_doc, /* tp_doc */
2775 0, /* tp_traverse */
2776 0, /* tp_clear */
2777 0, /* tp_richcompare */
2778 0, /* tp_weaklistoffset */
2779 0, /* tp_iter */
2780 0, /* tp_iternext */
2781 match_methods, /* tp_methods */
2782 match_members, /* tp_members */
2783 match_getset, /* tp_getset */
2784};
2785
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002786static PyMethodDef scanner_methods[] = {
2787 _SRE_SRE_SCANNER_MATCH_METHODDEF
2788 _SRE_SRE_SCANNER_SEARCH_METHODDEF
2789 {NULL, NULL}
2790};
2791
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002792#define SCAN_OFF(x) offsetof(ScannerObject, x)
2793static PyMemberDef scanner_members[] = {
2794 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2795 {NULL} /* Sentinel */
2796};
2797
2798static PyTypeObject Scanner_Type = {
2799 PyVarObject_HEAD_INIT(NULL, 0)
2800 "_" SRE_MODULE ".SRE_Scanner",
2801 sizeof(ScannerObject), 0,
2802 (destructor)scanner_dealloc,/* tp_dealloc */
2803 0, /* tp_print */
2804 0, /* tp_getattr */
2805 0, /* tp_setattr */
2806 0, /* tp_reserved */
2807 0, /* tp_repr */
2808 0, /* tp_as_number */
2809 0, /* tp_as_sequence */
2810 0, /* tp_as_mapping */
2811 0, /* tp_hash */
2812 0, /* tp_call */
2813 0, /* tp_str */
2814 0, /* tp_getattro */
2815 0, /* tp_setattro */
2816 0, /* tp_as_buffer */
2817 Py_TPFLAGS_DEFAULT, /* tp_flags */
2818 0, /* tp_doc */
2819 0, /* tp_traverse */
2820 0, /* tp_clear */
2821 0, /* tp_richcompare */
2822 0, /* tp_weaklistoffset */
2823 0, /* tp_iter */
2824 0, /* tp_iternext */
2825 scanner_methods, /* tp_methods */
2826 scanner_members, /* tp_members */
2827 0, /* tp_getset */
2828};
2829
Guido van Rossumb700df92000-03-31 14:59:30 +00002830static PyMethodDef _functions[] = {
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002831 _SRE_COMPILE_METHODDEF
2832 _SRE_GETCODESIZE_METHODDEF
2833 _SRE_GETLOWER_METHODDEF
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002834 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002835};
2836
Martin v. Löwis1a214512008-06-11 05:26:20 +00002837static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002838 PyModuleDef_HEAD_INIT,
2839 "_" SRE_MODULE,
2840 NULL,
2841 -1,
2842 _functions,
2843 NULL,
2844 NULL,
2845 NULL,
2846 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002847};
2848
2849PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002850{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002851 PyObject* m;
2852 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002853 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002854
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002855 /* Patch object types */
2856 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2857 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002858 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002859
Martin v. Löwis1a214512008-06-11 05:26:20 +00002860 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002861 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002862 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002863 d = PyModule_GetDict(m);
2864
Christian Heimes217cfd12007-12-02 14:31:20 +00002865 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002866 if (x) {
2867 PyDict_SetItemString(d, "MAGIC", x);
2868 Py_DECREF(x);
2869 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002870
Christian Heimes217cfd12007-12-02 14:31:20 +00002871 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002872 if (x) {
2873 PyDict_SetItemString(d, "CODESIZE", x);
2874 Py_DECREF(x);
2875 }
2876
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002877 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2878 if (x) {
2879 PyDict_SetItemString(d, "MAXREPEAT", x);
2880 Py_DECREF(x);
2881 }
2882
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03002883 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2884 if (x) {
2885 PyDict_SetItemString(d, "MAXGROUPS", x);
2886 Py_DECREF(x);
2887 }
2888
Neal Norwitzfe537132007-08-26 03:55:15 +00002889 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002890 if (x) {
2891 PyDict_SetItemString(d, "copyright", x);
2892 Py_DECREF(x);
2893 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002894 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002895}
2896
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002897/* vim:ts=4:sw=4:et
2898*/