blob: 150229dfbb6c8fb95a959869c84f499eeafa2f5f [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000038static char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000063/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064
65/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000066#define USE_FAST_SEARCH
67
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000068/* enables copy/deepcopy handling (work in progress) */
69#undef USE_BUILTIN_COPY
70
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000071/* -------------------------------------------------------------------- */
72
Fredrik Lundh80946112000-06-29 18:03:25 +000073#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000074#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000075#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000076/* fastest possible local call under MSVC */
77#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000078#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000079#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080#else
81#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000082#endif
83
84/* error codes */
85#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000086#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000087#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000088#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000089#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000090
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000091#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000092#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000093#else
94#define TRACE(v)
95#endif
96
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000097/* -------------------------------------------------------------------- */
98/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000099
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000100#define SRE_IS_DIGIT(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300101 ((ch) < 128 && Py_ISDIGIT(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000102#define SRE_IS_SPACE(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300103 ((ch) < 128 && Py_ISSPACE(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000104#define SRE_IS_LINEBREAK(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300105 ((ch) == '\n')
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000106#define SRE_IS_ALNUM(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300107 ((ch) < 128 && Py_ISALNUM(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000108#define SRE_IS_WORD(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300109 ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
Guido van Rossumb700df92000-03-31 14:59:30 +0000110
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000111static unsigned int sre_lower(unsigned int ch)
112{
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300113 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000114}
115
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200116static unsigned int sre_upper(unsigned int ch)
117{
118 return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
119}
120
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000121/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000122/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
123 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000124#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000125#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
126
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000127static unsigned int sre_lower_locale(unsigned int ch)
128{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000129 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000130}
131
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200132static unsigned int sre_upper_locale(unsigned int ch)
133{
134 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
135}
136
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000137/* unicode-specific character predicates */
138
Victor Stinner0058b862011-09-29 03:27:47 +0200139#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
140#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
141#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
142#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
143#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000144
145static unsigned int sre_lower_unicode(unsigned int ch)
146{
Victor Stinner0058b862011-09-29 03:27:47 +0200147 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000148}
149
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200150static unsigned int sre_upper_unicode(unsigned int ch)
151{
152 return (unsigned int) Py_UNICODE_TOUPPER(ch);
153}
154
Guido van Rossumb700df92000-03-31 14:59:30 +0000155LOCAL(int)
156sre_category(SRE_CODE category, unsigned int ch)
157{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000158 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000159
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000160 case SRE_CATEGORY_DIGIT:
161 return SRE_IS_DIGIT(ch);
162 case SRE_CATEGORY_NOT_DIGIT:
163 return !SRE_IS_DIGIT(ch);
164 case SRE_CATEGORY_SPACE:
165 return SRE_IS_SPACE(ch);
166 case SRE_CATEGORY_NOT_SPACE:
167 return !SRE_IS_SPACE(ch);
168 case SRE_CATEGORY_WORD:
169 return SRE_IS_WORD(ch);
170 case SRE_CATEGORY_NOT_WORD:
171 return !SRE_IS_WORD(ch);
172 case SRE_CATEGORY_LINEBREAK:
173 return SRE_IS_LINEBREAK(ch);
174 case SRE_CATEGORY_NOT_LINEBREAK:
175 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000176
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000177 case SRE_CATEGORY_LOC_WORD:
178 return SRE_LOC_IS_WORD(ch);
179 case SRE_CATEGORY_LOC_NOT_WORD:
180 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000181
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000182 case SRE_CATEGORY_UNI_DIGIT:
183 return SRE_UNI_IS_DIGIT(ch);
184 case SRE_CATEGORY_UNI_NOT_DIGIT:
185 return !SRE_UNI_IS_DIGIT(ch);
186 case SRE_CATEGORY_UNI_SPACE:
187 return SRE_UNI_IS_SPACE(ch);
188 case SRE_CATEGORY_UNI_NOT_SPACE:
189 return !SRE_UNI_IS_SPACE(ch);
190 case SRE_CATEGORY_UNI_WORD:
191 return SRE_UNI_IS_WORD(ch);
192 case SRE_CATEGORY_UNI_NOT_WORD:
193 return !SRE_UNI_IS_WORD(ch);
194 case SRE_CATEGORY_UNI_LINEBREAK:
195 return SRE_UNI_IS_LINEBREAK(ch);
196 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
197 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000198 }
199 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000200}
201
202/* helpers */
203
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000204static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000205data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000206{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000207 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000208 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000209 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000210 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000211 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000212}
213
214static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000215data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000216{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000217 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000218 minsize = state->data_stack_base+size;
219 cursize = state->data_stack_size;
220 if (cursize < minsize) {
221 void* stack;
222 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300223 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000224 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000225 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000226 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000227 return SRE_ERROR_MEMORY;
228 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000229 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000230 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000231 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000232 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000233}
234
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000235/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000236
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300237#define SRE_CHAR Py_UCS1
238#define SIZEOF_SRE_CHAR 1
239#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300240#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000241
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300242/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000243
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300244#define SRE_CHAR Py_UCS2
245#define SIZEOF_SRE_CHAR 2
246#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300247#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000248
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300249/* generate 32-bit unicode version */
250
251#define SRE_CHAR Py_UCS4
252#define SIZEOF_SRE_CHAR 4
253#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300254#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000255
256/* -------------------------------------------------------------------- */
257/* factories and destructors */
258
259/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100260static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300261static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
Guido van Rossumb700df92000-03-31 14:59:30 +0000262
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300263
264/*[clinic input]
265module _sre
266class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
267class _sre.SRE_Match "MatchObject *" "&Match_Type"
268class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
269[clinic start generated code]*/
270/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
271
Larry Hastings2d0a69a2015-05-03 14:49:19 -0700272static PyTypeObject Pattern_Type;
273static PyTypeObject Match_Type;
274static PyTypeObject Scanner_Type;
275
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300276/*[clinic input]
277_sre.getcodesize -> int
278[clinic start generated code]*/
279
280static int
281_sre_getcodesize_impl(PyModuleDef *module)
282/*[clinic end generated code: output=794f1f98ef4883e5 input=bd6f6ecf4916bb2b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000283{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300284 return sizeof(SRE_CODE);
Guido van Rossumb700df92000-03-31 14:59:30 +0000285}
286
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300287/*[clinic input]
288_sre.getlower -> int
289
290 character: int
291 flags: int
292 /
293
294[clinic start generated code]*/
295
296static int
297_sre_getlower_impl(PyModuleDef *module, int character, int flags)
298/*[clinic end generated code: output=5fc3616ae2a4c306 input=087d2f1c44bbca6f]*/
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000299{
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000300 if (flags & SRE_FLAG_LOCALE)
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300301 return sre_lower_locale(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000302 if (flags & SRE_FLAG_UNICODE)
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300303 return sre_lower_unicode(character);
304 return sre_lower(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000305}
306
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000307LOCAL(void)
308state_reset(SRE_STATE* state)
309{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000310 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000311 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000312
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000313 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000314 state->lastindex = -1;
315
316 state->repeat = NULL;
317
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000318 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000319}
320
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000321static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200322getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300323 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600324 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000325{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000326 /* given a python object, return a data pointer, a length (in
327 characters), and a character size. return NULL if the object
328 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000329
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000330 /* Unicode objects do not support the buffer API. So, get the data
331 directly instead. */
332 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200333 if (PyUnicode_READY(string) == -1)
334 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200335 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200336 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300337 *p_isbytes = 0;
338 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000339 }
340
Victor Stinner0058b862011-09-29 03:27:47 +0200341 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300342 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200343 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300344 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000345 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000346
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300347 *p_length = view->len;
348 *p_charsize = 1;
349 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000350
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300351 if (view->buf == NULL) {
352 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
353 PyBuffer_Release(view);
354 view->buf = NULL;
355 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000356 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300357 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000358}
359
360LOCAL(PyObject*)
361state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000362 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000363{
364 /* prepare state object */
365
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000366 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300367 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000368 void* ptr;
369
370 memset(state, 0, sizeof(SRE_STATE));
371
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300372 state->mark = PyMem_New(void *, pattern->groups * 2);
373 if (!state->mark) {
374 PyErr_NoMemory();
375 goto err;
376 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000377 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000378 state->lastindex = -1;
379
Benjamin Petersone48944b2012-03-07 14:50:25 -0600380 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300381 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000382 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600383 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000384
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300385 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600386 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200387 "cannot use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600388 goto err;
389 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300390 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600391 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200392 "cannot use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600393 goto err;
394 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000395
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000396 /* adjust boundaries */
397 if (start < 0)
398 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000399 else if (start > length)
400 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000401
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000402 if (end < 0)
403 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000404 else if (end > length)
405 end = length;
406
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300407 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000408 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000409
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000410 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000411
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000412 state->start = (void*) ((char*) ptr + start * state->charsize);
413 state->end = (void*) ((char*) ptr + end * state->charsize);
414
415 Py_INCREF(string);
416 state->string = string;
417 state->pos = start;
418 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000419
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200420 if (pattern->flags & SRE_FLAG_LOCALE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000421 state->lower = sre_lower_locale;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200422 state->upper = sre_upper_locale;
423 }
424 else if (pattern->flags & SRE_FLAG_UNICODE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000425 state->lower = sre_lower_unicode;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200426 state->upper = sre_upper_unicode;
427 }
428 else {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000429 state->lower = sre_lower;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200430 state->upper = sre_upper;
431 }
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000432
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000433 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600434 err:
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300435 PyMem_Del(state->mark);
436 state->mark = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600437 if (state->buffer.buf)
438 PyBuffer_Release(&state->buffer);
439 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000440}
441
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000442LOCAL(void)
443state_fini(SRE_STATE* state)
444{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600445 if (state->buffer.buf)
446 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000447 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000448 data_stack_dealloc(state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300449 PyMem_Del(state->mark);
450 state->mark = NULL;
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000451}
452
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000453/* calculate offset from start of string */
454#define STATE_OFFSET(state, member)\
455 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
456
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000457LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300458getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300459 PyObject* string, Py_ssize_t start, Py_ssize_t end)
460{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300461 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300462 if (PyBytes_CheckExact(string) &&
463 start == 0 && end == PyBytes_GET_SIZE(string)) {
464 Py_INCREF(string);
465 return string;
466 }
467 return PyBytes_FromStringAndSize(
468 (const char *)ptr + start, end - start);
469 }
470 else {
471 return PyUnicode_Substring(string, start, end);
472 }
473}
474
475LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000476state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000477{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000478 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000479
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000480 index = (index - 1) * 2;
481
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000482 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000483 if (empty)
484 /* want empty string */
485 i = j = 0;
486 else {
487 Py_INCREF(Py_None);
488 return Py_None;
489 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000490 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000491 i = STATE_OFFSET(state, state->mark[index]);
492 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000493 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000494
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300495 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000496}
497
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000498static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100499pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000500{
501 switch (status) {
502 case SRE_ERROR_RECURSION_LIMIT:
Yury Selivanovf488fb42015-07-03 01:04:23 -0400503 /* This error code seems to be unused. */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000504 PyErr_SetString(
Yury Selivanovf488fb42015-07-03 01:04:23 -0400505 PyExc_RecursionError,
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000506 "maximum recursion limit exceeded"
507 );
508 break;
509 case SRE_ERROR_MEMORY:
510 PyErr_NoMemory();
511 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000512 case SRE_ERROR_INTERRUPTED:
513 /* An exception has already been raised, so let it fly */
514 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000515 default:
516 /* other error codes indicate compiler/engine bugs */
517 PyErr_SetString(
518 PyExc_RuntimeError,
519 "internal error in regular expression engine"
520 );
521 }
522}
523
Guido van Rossumb700df92000-03-31 14:59:30 +0000524static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000525pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000526{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000527 if (self->weakreflist != NULL)
528 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000529 Py_XDECREF(self->pattern);
530 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000531 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000532 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000533}
534
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300535LOCAL(Py_ssize_t)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300536sre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300537{
538 if (state->charsize == 1)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300539 return sre_ucs1_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300540 if (state->charsize == 2)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300541 return sre_ucs2_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300542 assert(state->charsize == 4);
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300543 return sre_ucs4_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300544}
545
546LOCAL(Py_ssize_t)
547sre_search(SRE_STATE* state, SRE_CODE* pattern)
548{
549 if (state->charsize == 1)
550 return sre_ucs1_search(state, pattern);
551 if (state->charsize == 2)
552 return sre_ucs2_search(state, pattern);
553 assert(state->charsize == 4);
554 return sre_ucs4_search(state, pattern);
555}
556
Larry Hastings16c51912014-01-07 11:53:01 -0800557static PyObject *
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200558fix_string_param(PyObject *string, PyObject *string2, const char *oldname)
559{
560 if (string2 != NULL) {
561 if (string != NULL) {
562 PyErr_Format(PyExc_TypeError,
563 "Argument given by name ('%s') and position (1)",
564 oldname);
565 return NULL;
566 }
567 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
568 "The '%s' keyword parameter name is deprecated. "
569 "Use 'string' instead.", oldname) < 0)
570 return NULL;
571 return string2;
572 }
573 if (string == NULL) {
574 PyErr_SetString(PyExc_TypeError,
575 "Required argument 'string' (pos 1) not found");
576 return NULL;
577 }
578 return string;
579}
Larry Hastings16c51912014-01-07 11:53:01 -0800580
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300581/*[clinic input]
582_sre.SRE_Pattern.match
583
584 string: object = NULL
585 pos: Py_ssize_t = 0
586 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
587 *
588 pattern: object = NULL
589
590Matches zero or more characters at the beginning of the string.
591[clinic start generated code]*/
592
Larry Hastings16c51912014-01-07 11:53:01 -0800593static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300594_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
595 Py_ssize_t pos, Py_ssize_t endpos,
596 PyObject *pattern)
597/*[clinic end generated code: output=74b4b1da3bb2d84e input=3d079aa99979b81d]*/
Larry Hastings16c51912014-01-07 11:53:01 -0800598{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000599 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100600 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300601 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000602
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200603 string = fix_string_param(string, pattern, "pattern");
604 if (!string)
605 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300606 if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000607 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000608
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000609 state.ptr = state.start;
610
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000611 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
612
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300613 status = sre_match(&state, PatternObject_GetCode(self), 0);
Guido van Rossumb700df92000-03-31 14:59:30 +0000614
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000615 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300616 if (PyErr_Occurred()) {
617 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000618 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300619 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000620
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300621 match = pattern_new_match(self, &state, status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000622 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300623 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000624}
625
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300626/*[clinic input]
627_sre.SRE_Pattern.fullmatch
628
629 string: object = NULL
630 pos: Py_ssize_t = 0
631 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
632 *
633 pattern: object = NULL
634
635Matches against all of the string
636[clinic start generated code]*/
637
638static PyObject *
639_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
640 Py_ssize_t pos, Py_ssize_t endpos,
641 PyObject *pattern)
642/*[clinic end generated code: output=1c98bc5da744ea94 input=d4228606cc12580f]*/
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200643{
644 SRE_STATE state;
645 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300646 PyObject *match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200647
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300648 string = fix_string_param(string, pattern, "pattern");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200649 if (!string)
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200650 return NULL;
651
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300652 if (!state_init(&state, self, string, pos, endpos))
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200653 return NULL;
654
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200655 state.ptr = state.start;
656
657 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
658
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300659 status = sre_match(&state, PatternObject_GetCode(self), 1);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200660
661 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300662 if (PyErr_Occurred()) {
663 state_fini(&state);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200664 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300665 }
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200666
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300667 match = pattern_new_match(self, &state, status);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200668 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300669 return match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200670}
671
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300672/*[clinic input]
673_sre.SRE_Pattern.search
674
675 string: object = NULL
676 pos: Py_ssize_t = 0
677 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
678 *
679 pattern: object = NULL
680
681Scan through string looking for a match, and return a corresponding match object instance.
682
683Return None if no position in the string matches.
684[clinic start generated code]*/
685
686static PyObject *
687_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
688 Py_ssize_t pos, Py_ssize_t endpos,
689 PyObject *pattern)
690/*[clinic end generated code: output=3839394a18e5ea4f input=dab42720f4be3a4b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000691{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000692 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100693 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300694 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000695
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300696 string = fix_string_param(string, pattern, "pattern");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200697 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000698 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000699
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300700 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000701 return NULL;
702
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000703 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
704
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300705 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000706
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000707 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
708
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300709 if (PyErr_Occurred()) {
710 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000711 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300712 }
Thomas Wouters89f507f2006-12-13 04:49:30 +0000713
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300714 match = pattern_new_match(self, &state, status);
715 state_fini(&state);
716 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000717}
718
719static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000720call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000721{
722 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000723 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000724 PyObject* func;
725 PyObject* result;
726
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000727 if (!args)
728 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000729 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000730 if (!name)
731 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000732 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000733 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000734 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000735 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000736 func = PyObject_GetAttrString(mod, function);
737 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000738 if (!func)
739 return NULL;
740 result = PyObject_CallObject(func, args);
741 Py_DECREF(func);
742 Py_DECREF(args);
743 return result;
744}
745
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000746#ifdef USE_BUILTIN_COPY
747static int
748deepcopy(PyObject** object, PyObject* memo)
749{
750 PyObject* copy;
751
752 copy = call(
753 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000754 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000755 );
756 if (!copy)
757 return 0;
758
Serhiy Storchaka5a57ade2015-12-24 10:35:59 +0200759 Py_SETREF(*object, copy);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000760
761 return 1; /* success */
762}
763#endif
764
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300765/*[clinic input]
766_sre.SRE_Pattern.findall
767
768 string: object = NULL
769 pos: Py_ssize_t = 0
770 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
771 *
772 source: object = NULL
773
774Return a list of all non-overlapping matches of pattern in string.
775[clinic start generated code]*/
776
777static PyObject *
778_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
779 Py_ssize_t pos, Py_ssize_t endpos,
780 PyObject *source)
781/*[clinic end generated code: output=51295498b300639d input=df688355c056b9de]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000782{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000783 SRE_STATE state;
784 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100785 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000786 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000787
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300788 string = fix_string_param(string, source, "source");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200789 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000790 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000791
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300792 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000793 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000794
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000795 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000796 if (!list) {
797 state_fini(&state);
798 return NULL;
799 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000800
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000801 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000802
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000803 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000804
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000805 state_reset(&state);
806
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000807 state.ptr = state.start;
808
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300809 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300810 if (PyErr_Occurred())
811 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000812
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000813 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000814 if (status == 0)
815 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000816 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000817 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000818 }
Tim Peters3d563502006-01-21 02:47:53 +0000819
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000820 /* don't bother to build a match object */
821 switch (self->groups) {
822 case 0:
823 b = STATE_OFFSET(&state, state.start);
824 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300825 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300826 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000827 if (!item)
828 goto error;
829 break;
830 case 1:
831 item = state_getslice(&state, 1, string, 1);
832 if (!item)
833 goto error;
834 break;
835 default:
836 item = PyTuple_New(self->groups);
837 if (!item)
838 goto error;
839 for (i = 0; i < self->groups; i++) {
840 PyObject* o = state_getslice(&state, i+1, string, 1);
841 if (!o) {
842 Py_DECREF(item);
843 goto error;
844 }
845 PyTuple_SET_ITEM(item, i, o);
846 }
847 break;
848 }
849
850 status = PyList_Append(list, item);
851 Py_DECREF(item);
852 if (status < 0)
853 goto error;
854
855 if (state.ptr == state.start)
856 state.start = (void*) ((char*) state.ptr + state.charsize);
857 else
858 state.start = state.ptr;
859
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000860 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000861
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000862 state_fini(&state);
863 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000864
865error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000866 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000867 state_fini(&state);
868 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000869
Guido van Rossumb700df92000-03-31 14:59:30 +0000870}
871
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300872/*[clinic input]
873_sre.SRE_Pattern.finditer
874
875 string: object
876 pos: Py_ssize_t = 0
877 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
878
879Return an iterator over all non-overlapping matches for the RE pattern in string.
880
881For each match, the iterator returns a match object.
882[clinic start generated code]*/
883
884static PyObject *
885_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
886 Py_ssize_t pos, Py_ssize_t endpos)
887/*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
Fredrik Lundh703ce812001-10-24 22:16:30 +0000888{
889 PyObject* scanner;
890 PyObject* search;
891 PyObject* iterator;
892
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300893 scanner = pattern_scanner(self, string, pos, endpos);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000894 if (!scanner)
895 return NULL;
896
897 search = PyObject_GetAttrString(scanner, "search");
898 Py_DECREF(scanner);
899 if (!search)
900 return NULL;
901
902 iterator = PyCallIter_New(search, Py_None);
903 Py_DECREF(search);
904
905 return iterator;
906}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000907
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300908/*[clinic input]
909_sre.SRE_Pattern.scanner
910
911 string: object
912 pos: Py_ssize_t = 0
913 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
914
915[clinic start generated code]*/
916
917static PyObject *
918_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
919 Py_ssize_t pos, Py_ssize_t endpos)
920/*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
921{
922 return pattern_scanner(self, string, pos, endpos);
923}
924
925/*[clinic input]
926_sre.SRE_Pattern.split
927
928 string: object = NULL
929 maxsplit: Py_ssize_t = 0
930 *
931 source: object = NULL
932
933Split string by the occurrences of pattern.
934[clinic start generated code]*/
935
936static PyObject *
937_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
938 Py_ssize_t maxsplit, PyObject *source)
939/*[clinic end generated code: output=20bac2ff55b9f84c input=41e0b2e35e599d7b]*/
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000940{
941 SRE_STATE state;
942 PyObject* list;
943 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100944 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000945 Py_ssize_t n;
946 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000947 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000948
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300949 string = fix_string_param(string, source, "source");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200950 if (!string)
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000951 return NULL;
952
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200953 assert(self->codesize != 0);
954 if (self->code[0] != SRE_OP_INFO || self->code[3] == 0) {
955 if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) {
956 PyErr_SetString(PyExc_ValueError,
957 "split() requires a non-empty pattern match.");
958 return NULL;
959 }
960 if (PyErr_WarnEx(PyExc_FutureWarning,
961 "split() requires a non-empty pattern match.",
962 1) < 0)
963 return NULL;
964 }
965
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300966 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000967 return NULL;
968
969 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000970 if (!list) {
971 state_fini(&state);
972 return NULL;
973 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000974
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000975 n = 0;
976 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000977
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000978 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000979
980 state_reset(&state);
981
982 state.ptr = state.start;
983
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300984 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300985 if (PyErr_Occurred())
986 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000987
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000988 if (status <= 0) {
989 if (status == 0)
990 break;
991 pattern_error(status);
992 goto error;
993 }
Tim Peters3d563502006-01-21 02:47:53 +0000994
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000995 if (state.start == state.ptr) {
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +0300996 if (last == state.end || state.ptr == state.end)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000997 break;
998 /* skip one character */
999 state.start = (void*) ((char*) state.ptr + state.charsize);
1000 continue;
1001 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001002
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001003 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001004 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001005 string, STATE_OFFSET(&state, last),
1006 STATE_OFFSET(&state, state.start)
1007 );
1008 if (!item)
1009 goto error;
1010 status = PyList_Append(list, item);
1011 Py_DECREF(item);
1012 if (status < 0)
1013 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001014
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001015 /* add groups (if any) */
1016 for (i = 0; i < self->groups; i++) {
1017 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001018 if (!item)
1019 goto error;
1020 status = PyList_Append(list, item);
1021 Py_DECREF(item);
1022 if (status < 0)
1023 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001024 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001025
1026 n = n + 1;
1027
1028 last = state.start = state.ptr;
1029
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001030 }
1031
Fredrik Lundhf864aa82001-10-22 06:01:56 +00001032 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001033 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +00001034 string, STATE_OFFSET(&state, last), state.endpos
1035 );
1036 if (!item)
1037 goto error;
1038 status = PyList_Append(list, item);
1039 Py_DECREF(item);
1040 if (status < 0)
1041 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001042
1043 state_fini(&state);
1044 return list;
1045
1046error:
1047 Py_DECREF(list);
1048 state_fini(&state);
1049 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001050
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001051}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001052
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001053static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001054pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001055 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001056{
1057 SRE_STATE state;
1058 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001059 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001060 PyObject* item;
1061 PyObject* filter;
1062 PyObject* args;
1063 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001064 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01001065 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001066 Py_ssize_t n;
1067 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001068 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001069 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001070 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001071
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001072 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001073 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001074 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001075 Py_INCREF(filter);
1076 filter_is_callable = 1;
1077 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001078 /* if not callable, check if it's a literal string */
1079 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001080 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001081 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001082 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001083 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001084 if (charsize == 1)
1085 literal = memchr(ptr, '\\', n) == NULL;
1086 else
1087 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001088 } else {
1089 PyErr_Clear();
1090 literal = 0;
1091 }
Benjamin Petersone48944b2012-03-07 14:50:25 -06001092 if (view.buf)
1093 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001094 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001095 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001096 Py_INCREF(filter);
1097 filter_is_callable = 0;
1098 } else {
1099 /* not a literal; hand it over to the template compiler */
1100 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001101 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001102 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001103 );
1104 if (!filter)
1105 return NULL;
1106 filter_is_callable = PyCallable_Check(filter);
1107 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00001108 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001109
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001110 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001111 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001112 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001113 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001114
1115 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001116 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001117 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001118 state_fini(&state);
1119 return NULL;
1120 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001121
1122 n = i = 0;
1123
1124 while (!count || n < count) {
1125
1126 state_reset(&state);
1127
1128 state.ptr = state.start;
1129
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001130 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001131 if (PyErr_Occurred())
1132 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001133
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001134 if (status <= 0) {
1135 if (status == 0)
1136 break;
1137 pattern_error(status);
1138 goto error;
1139 }
Tim Peters3d563502006-01-21 02:47:53 +00001140
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001141 b = STATE_OFFSET(&state, state.start);
1142 e = STATE_OFFSET(&state, state.ptr);
1143
1144 if (i < b) {
1145 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001146 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001147 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001148 if (!item)
1149 goto error;
1150 status = PyList_Append(list, item);
1151 Py_DECREF(item);
1152 if (status < 0)
1153 goto error;
1154
1155 } else if (i == b && i == e && n > 0)
1156 /* ignore empty match on latest position */
1157 goto next;
1158
1159 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001160 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001161 match = pattern_new_match(self, &state, 1);
1162 if (!match)
1163 goto error;
Raymond Hettinger8ae46892003-10-12 19:09:37 +00001164 args = PyTuple_Pack(1, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001165 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00001166 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001167 goto error;
1168 }
1169 item = PyObject_CallObject(filter, args);
1170 Py_DECREF(args);
1171 Py_DECREF(match);
1172 if (!item)
1173 goto error;
1174 } else {
1175 /* filter is literal string */
1176 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001177 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001178 }
1179
1180 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001181 if (item != Py_None) {
1182 status = PyList_Append(list, item);
1183 Py_DECREF(item);
1184 if (status < 0)
1185 goto error;
1186 }
Tim Peters3d563502006-01-21 02:47:53 +00001187
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001188 i = e;
1189 n = n + 1;
1190
1191next:
1192 /* move on */
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001193 if (state.ptr == state.end)
1194 break;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001195 if (state.ptr == state.start)
1196 state.start = (void*) ((char*) state.ptr + state.charsize);
1197 else
1198 state.start = state.ptr;
1199
1200 }
1201
1202 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001203 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001204 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001205 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001206 if (!item)
1207 goto error;
1208 status = PyList_Append(list, item);
1209 Py_DECREF(item);
1210 if (status < 0)
1211 goto error;
1212 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001213
1214 state_fini(&state);
1215
Guido van Rossum4e173842001-12-07 04:25:10 +00001216 Py_DECREF(filter);
1217
Fredrik Lundhdac58492001-10-21 21:48:30 +00001218 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001219 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001220 if (!joiner) {
1221 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001222 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001223 }
1224 if (PyList_GET_SIZE(list) == 0) {
1225 Py_DECREF(list);
1226 item = joiner;
1227 }
1228 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001229 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001230 item = _PyBytes_Join(joiner, list);
1231 else
1232 item = PyUnicode_Join(joiner, list);
1233 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001234 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001235 if (!item)
1236 return NULL;
1237 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001238
1239 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001240 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001241
1242 return item;
1243
1244error:
1245 Py_DECREF(list);
1246 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001247 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001248 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001249
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001250}
1251
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001252/*[clinic input]
1253_sre.SRE_Pattern.sub
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001254
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001255 repl: object
1256 string: object
1257 count: Py_ssize_t = 0
1258
1259Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1260[clinic start generated code]*/
1261
1262static PyObject *
1263_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1264 PyObject *string, Py_ssize_t count)
1265/*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1266{
1267 return pattern_subx(self, repl, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001268}
1269
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001270/*[clinic input]
1271_sre.SRE_Pattern.subn
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001272
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001273 repl: object
1274 string: object
1275 count: Py_ssize_t = 0
1276
1277Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1278[clinic start generated code]*/
1279
1280static PyObject *
1281_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1282 PyObject *string, Py_ssize_t count)
1283/*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1284{
1285 return pattern_subx(self, repl, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001286}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001287
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001288/*[clinic input]
1289_sre.SRE_Pattern.__copy__
1290
1291[clinic start generated code]*/
1292
1293static PyObject *
1294_sre_SRE_Pattern___copy___impl(PatternObject *self)
1295/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001296{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001297#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001298 PatternObject* copy;
1299 int offset;
1300
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001301 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
1302 if (!copy)
1303 return NULL;
1304
1305 offset = offsetof(PatternObject, groups);
1306
1307 Py_XINCREF(self->groupindex);
1308 Py_XINCREF(self->indexgroup);
1309 Py_XINCREF(self->pattern);
1310
1311 memcpy((char*) copy + offset, (char*) self + offset,
1312 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00001313 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001314
1315 return (PyObject*) copy;
1316#else
1317 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
1318 return NULL;
1319#endif
1320}
1321
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001322/*[clinic input]
1323_sre.SRE_Pattern.__deepcopy__
1324
1325 memo: object
1326
1327[clinic start generated code]*/
1328
1329static PyObject *
1330_sre_SRE_Pattern___deepcopy___impl(PatternObject *self, PyObject *memo)
1331/*[clinic end generated code: output=75efe69bd12c5d7d input=3959719482c07f70]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001332{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001333#ifdef USE_BUILTIN_COPY
1334 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00001335
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001336 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001337 if (!copy)
1338 return NULL;
1339
1340 if (!deepcopy(&copy->groupindex, memo) ||
1341 !deepcopy(&copy->indexgroup, memo) ||
1342 !deepcopy(&copy->pattern, memo)) {
1343 Py_DECREF(copy);
1344 return NULL;
1345 }
1346
1347#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001348 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
1349 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001350#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001351}
1352
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001353static PyObject *
1354pattern_repr(PatternObject *obj)
1355{
1356 static const struct {
1357 const char *name;
1358 int value;
1359 } flag_names[] = {
1360 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1361 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1362 {"re.LOCALE", SRE_FLAG_LOCALE},
1363 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1364 {"re.DOTALL", SRE_FLAG_DOTALL},
1365 {"re.UNICODE", SRE_FLAG_UNICODE},
1366 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1367 {"re.DEBUG", SRE_FLAG_DEBUG},
1368 {"re.ASCII", SRE_FLAG_ASCII},
1369 };
1370 PyObject *result = NULL;
1371 PyObject *flag_items;
Victor Stinner706768c2014-08-16 01:03:39 +02001372 size_t i;
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001373 int flags = obj->flags;
1374
1375 /* Omit re.UNICODE for valid string patterns. */
1376 if (obj->isbytes == 0 &&
1377 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1378 SRE_FLAG_UNICODE)
1379 flags &= ~SRE_FLAG_UNICODE;
1380
1381 flag_items = PyList_New(0);
1382 if (!flag_items)
1383 return NULL;
1384
1385 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1386 if (flags & flag_names[i].value) {
1387 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1388 if (!item)
1389 goto done;
1390
1391 if (PyList_Append(flag_items, item) < 0) {
1392 Py_DECREF(item);
1393 goto done;
1394 }
1395 Py_DECREF(item);
1396 flags &= ~flag_names[i].value;
1397 }
1398 }
1399 if (flags) {
1400 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1401 if (!item)
1402 goto done;
1403
1404 if (PyList_Append(flag_items, item) < 0) {
1405 Py_DECREF(item);
1406 goto done;
1407 }
1408 Py_DECREF(item);
1409 }
1410
1411 if (PyList_Size(flag_items) > 0) {
1412 PyObject *flags_result;
1413 PyObject *sep = PyUnicode_FromString("|");
1414 if (!sep)
1415 goto done;
1416 flags_result = PyUnicode_Join(sep, flag_items);
1417 Py_DECREF(sep);
1418 if (!flags_result)
1419 goto done;
1420 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1421 obj->pattern, flags_result);
1422 Py_DECREF(flags_result);
1423 }
1424 else {
1425 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1426 }
1427
1428done:
1429 Py_DECREF(flag_items);
1430 return result;
1431}
1432
Raymond Hettinger94478742004-09-24 04:31:19 +00001433PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
1434
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001435/* PatternObject's 'groupindex' method. */
1436static PyObject *
1437pattern_groupindex(PatternObject *self)
1438{
1439 return PyDictProxy_New(self->groupindex);
1440}
1441
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001442static int _validate(PatternObject *self); /* Forward */
1443
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001444/*[clinic input]
1445_sre.compile
1446
1447 pattern: object
1448 flags: int
1449 code: object(subclass_of='&PyList_Type')
1450 groups: Py_ssize_t
1451 groupindex: object
1452 indexgroup: object
1453
1454[clinic start generated code]*/
1455
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001456static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001457_sre_compile_impl(PyModuleDef *module, PyObject *pattern, int flags,
1458 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1459 PyObject *indexgroup)
1460/*[clinic end generated code: output=3004b293730bf309 input=7d059ec8ae1edb85]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001461{
1462 /* "compile" pattern descriptor to pattern object */
1463
1464 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001465 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001466
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001467 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001468 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001469 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1470 if (!self)
1471 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001472 self->weakreflist = NULL;
1473 self->pattern = NULL;
1474 self->groupindex = NULL;
1475 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001476
1477 self->codesize = n;
1478
1479 for (i = 0; i < n; i++) {
1480 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001481 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001482 self->code[i] = (SRE_CODE) value;
1483 if ((unsigned long) self->code[i] != value) {
1484 PyErr_SetString(PyExc_OverflowError,
1485 "regular expression code size limit exceeded");
1486 break;
1487 }
1488 }
1489
1490 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001491 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001492 return NULL;
1493 }
1494
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001495 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001496 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001497 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001498 else {
1499 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001500 int charsize;
1501 Py_buffer view;
1502 view.buf = NULL;
1503 if (!getstring(pattern, &p_length, &self->isbytes,
1504 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001505 Py_DECREF(self);
1506 return NULL;
1507 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001508 if (view.buf)
1509 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001510 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001511
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001512 Py_INCREF(pattern);
1513 self->pattern = pattern;
1514
1515 self->flags = flags;
1516
1517 self->groups = groups;
1518
1519 Py_XINCREF(groupindex);
1520 self->groupindex = groupindex;
1521
1522 Py_XINCREF(indexgroup);
1523 self->indexgroup = indexgroup;
1524
1525 self->weakreflist = NULL;
1526
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001527 if (!_validate(self)) {
1528 Py_DECREF(self);
1529 return NULL;
1530 }
1531
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001532 return (PyObject*) self;
1533}
1534
Guido van Rossumb700df92000-03-31 14:59:30 +00001535/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001536/* Code validation */
1537
1538/* To learn more about this code, have a look at the _compile() function in
1539 Lib/sre_compile.py. The validation functions below checks the code array
1540 for conformance with the code patterns generated there.
1541
1542 The nice thing about the generated code is that it is position-independent:
1543 all jumps are relative jumps forward. Also, jumps don't cross each other:
1544 the target of a later jump is always earlier than the target of an earlier
1545 jump. IOW, this is okay:
1546
1547 J---------J-------T--------T
1548 \ \_____/ /
1549 \______________________/
1550
1551 but this is not:
1552
1553 J---------J-------T--------T
1554 \_________\_____/ /
1555 \____________/
1556
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001557 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001558*/
1559
1560/* Defining this one enables tracing of the validator */
1561#undef VVERBOSE
1562
1563/* Trace macro for the validator */
1564#if defined(VVERBOSE)
1565#define VTRACE(v) printf v
1566#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001567#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001568#endif
1569
1570/* Report failure */
1571#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1572
1573/* Extract opcode, argument, or skip count from code array */
1574#define GET_OP \
1575 do { \
1576 VTRACE(("%p: ", code)); \
1577 if (code >= end) FAIL; \
1578 op = *code++; \
1579 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1580 } while (0)
1581#define GET_ARG \
1582 do { \
1583 VTRACE(("%p= ", code)); \
1584 if (code >= end) FAIL; \
1585 arg = *code++; \
1586 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1587 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001588#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001589 do { \
1590 VTRACE(("%p= ", code)); \
1591 if (code >= end) FAIL; \
1592 skip = *code; \
1593 VTRACE(("%lu (skip to %p)\n", \
1594 (unsigned long)skip, code+skip)); \
Victor Stinner1fa174a2013-08-28 02:06:21 +02001595 if (skip-adj > (Py_uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001596 FAIL; \
1597 code++; \
1598 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001599#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001600
1601static int
1602_validate_charset(SRE_CODE *code, SRE_CODE *end)
1603{
1604 /* Some variables are manipulated by the macros above */
1605 SRE_CODE op;
1606 SRE_CODE arg;
1607 SRE_CODE offset;
1608 int i;
1609
1610 while (code < end) {
1611 GET_OP;
1612 switch (op) {
1613
1614 case SRE_OP_NEGATE:
1615 break;
1616
1617 case SRE_OP_LITERAL:
1618 GET_ARG;
1619 break;
1620
1621 case SRE_OP_RANGE:
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +02001622 case SRE_OP_RANGE_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001623 GET_ARG;
1624 GET_ARG;
1625 break;
1626
1627 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001628 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001629 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001630 FAIL;
1631 code += offset;
1632 break;
1633
1634 case SRE_OP_BIGCHARSET:
1635 GET_ARG; /* Number of blocks */
1636 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001637 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001638 FAIL;
1639 /* Make sure that each byte points to a valid block */
1640 for (i = 0; i < 256; i++) {
1641 if (((unsigned char *)code)[i] >= arg)
1642 FAIL;
1643 }
1644 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001645 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001646 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001647 FAIL;
1648 code += offset;
1649 break;
1650
1651 case SRE_OP_CATEGORY:
1652 GET_ARG;
1653 switch (arg) {
1654 case SRE_CATEGORY_DIGIT:
1655 case SRE_CATEGORY_NOT_DIGIT:
1656 case SRE_CATEGORY_SPACE:
1657 case SRE_CATEGORY_NOT_SPACE:
1658 case SRE_CATEGORY_WORD:
1659 case SRE_CATEGORY_NOT_WORD:
1660 case SRE_CATEGORY_LINEBREAK:
1661 case SRE_CATEGORY_NOT_LINEBREAK:
1662 case SRE_CATEGORY_LOC_WORD:
1663 case SRE_CATEGORY_LOC_NOT_WORD:
1664 case SRE_CATEGORY_UNI_DIGIT:
1665 case SRE_CATEGORY_UNI_NOT_DIGIT:
1666 case SRE_CATEGORY_UNI_SPACE:
1667 case SRE_CATEGORY_UNI_NOT_SPACE:
1668 case SRE_CATEGORY_UNI_WORD:
1669 case SRE_CATEGORY_UNI_NOT_WORD:
1670 case SRE_CATEGORY_UNI_LINEBREAK:
1671 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1672 break;
1673 default:
1674 FAIL;
1675 }
1676 break;
1677
1678 default:
1679 FAIL;
1680
1681 }
1682 }
1683
1684 return 1;
1685}
1686
1687static int
1688_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1689{
1690 /* Some variables are manipulated by the macros above */
1691 SRE_CODE op;
1692 SRE_CODE arg;
1693 SRE_CODE skip;
1694
1695 VTRACE(("code=%p, end=%p\n", code, end));
1696
1697 if (code > end)
1698 FAIL;
1699
1700 while (code < end) {
1701 GET_OP;
1702 switch (op) {
1703
1704 case SRE_OP_MARK:
1705 /* We don't check whether marks are properly nested; the
1706 sre_match() code is robust even if they don't, and the worst
1707 you can get is nonsensical match results. */
1708 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001709 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001710 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1711 FAIL;
1712 }
1713 break;
1714
1715 case SRE_OP_LITERAL:
1716 case SRE_OP_NOT_LITERAL:
1717 case SRE_OP_LITERAL_IGNORE:
1718 case SRE_OP_NOT_LITERAL_IGNORE:
1719 GET_ARG;
1720 /* The arg is just a character, nothing to check */
1721 break;
1722
1723 case SRE_OP_SUCCESS:
1724 case SRE_OP_FAILURE:
1725 /* Nothing to check; these normally end the matching process */
1726 break;
1727
1728 case SRE_OP_AT:
1729 GET_ARG;
1730 switch (arg) {
1731 case SRE_AT_BEGINNING:
1732 case SRE_AT_BEGINNING_STRING:
1733 case SRE_AT_BEGINNING_LINE:
1734 case SRE_AT_END:
1735 case SRE_AT_END_LINE:
1736 case SRE_AT_END_STRING:
1737 case SRE_AT_BOUNDARY:
1738 case SRE_AT_NON_BOUNDARY:
1739 case SRE_AT_LOC_BOUNDARY:
1740 case SRE_AT_LOC_NON_BOUNDARY:
1741 case SRE_AT_UNI_BOUNDARY:
1742 case SRE_AT_UNI_NON_BOUNDARY:
1743 break;
1744 default:
1745 FAIL;
1746 }
1747 break;
1748
1749 case SRE_OP_ANY:
1750 case SRE_OP_ANY_ALL:
1751 /* These have no operands */
1752 break;
1753
1754 case SRE_OP_IN:
1755 case SRE_OP_IN_IGNORE:
1756 GET_SKIP;
1757 /* Stop 1 before the end; we check the FAILURE below */
1758 if (!_validate_charset(code, code+skip-2))
1759 FAIL;
1760 if (code[skip-2] != SRE_OP_FAILURE)
1761 FAIL;
1762 code += skip-1;
1763 break;
1764
1765 case SRE_OP_INFO:
1766 {
1767 /* A minimal info field is
1768 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1769 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1770 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001771 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001772 SRE_CODE *newcode;
1773 GET_SKIP;
1774 newcode = code+skip-1;
1775 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001776 GET_ARG;
1777 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001778 /* Check that only valid flags are present */
1779 if ((flags & ~(SRE_INFO_PREFIX |
1780 SRE_INFO_LITERAL |
1781 SRE_INFO_CHARSET)) != 0)
1782 FAIL;
1783 /* PREFIX and CHARSET are mutually exclusive */
1784 if ((flags & SRE_INFO_PREFIX) &&
1785 (flags & SRE_INFO_CHARSET))
1786 FAIL;
1787 /* LITERAL implies PREFIX */
1788 if ((flags & SRE_INFO_LITERAL) &&
1789 !(flags & SRE_INFO_PREFIX))
1790 FAIL;
1791 /* Validate the prefix */
1792 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001793 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001794 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001795 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001796 /* Here comes the prefix string */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001797 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001798 FAIL;
1799 code += prefix_len;
1800 /* And here comes the overlap table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001801 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001802 FAIL;
1803 /* Each overlap value should be < prefix_len */
1804 for (i = 0; i < prefix_len; i++) {
1805 if (code[i] >= prefix_len)
1806 FAIL;
1807 }
1808 code += prefix_len;
1809 }
1810 /* Validate the charset */
1811 if (flags & SRE_INFO_CHARSET) {
1812 if (!_validate_charset(code, newcode-1))
1813 FAIL;
1814 if (newcode[-1] != SRE_OP_FAILURE)
1815 FAIL;
1816 code = newcode;
1817 }
1818 else if (code != newcode) {
1819 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1820 FAIL;
1821 }
1822 }
1823 break;
1824
1825 case SRE_OP_BRANCH:
1826 {
1827 SRE_CODE *target = NULL;
1828 for (;;) {
1829 GET_SKIP;
1830 if (skip == 0)
1831 break;
1832 /* Stop 2 before the end; we check the JUMP below */
1833 if (!_validate_inner(code, code+skip-3, groups))
1834 FAIL;
1835 code += skip-3;
1836 /* Check that it ends with a JUMP, and that each JUMP
1837 has the same target */
1838 GET_OP;
1839 if (op != SRE_OP_JUMP)
1840 FAIL;
1841 GET_SKIP;
1842 if (target == NULL)
1843 target = code+skip-1;
1844 else if (code+skip-1 != target)
1845 FAIL;
1846 }
1847 }
1848 break;
1849
1850 case SRE_OP_REPEAT_ONE:
1851 case SRE_OP_MIN_REPEAT_ONE:
1852 {
1853 SRE_CODE min, max;
1854 GET_SKIP;
1855 GET_ARG; min = arg;
1856 GET_ARG; max = arg;
1857 if (min > max)
1858 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001859 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001860 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001861 if (!_validate_inner(code, code+skip-4, groups))
1862 FAIL;
1863 code += skip-4;
1864 GET_OP;
1865 if (op != SRE_OP_SUCCESS)
1866 FAIL;
1867 }
1868 break;
1869
1870 case SRE_OP_REPEAT:
1871 {
1872 SRE_CODE min, max;
1873 GET_SKIP;
1874 GET_ARG; min = arg;
1875 GET_ARG; max = arg;
1876 if (min > max)
1877 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001878 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001879 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001880 if (!_validate_inner(code, code+skip-3, groups))
1881 FAIL;
1882 code += skip-3;
1883 GET_OP;
1884 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1885 FAIL;
1886 }
1887 break;
1888
1889 case SRE_OP_GROUPREF:
1890 case SRE_OP_GROUPREF_IGNORE:
1891 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001892 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001893 FAIL;
1894 break;
1895
1896 case SRE_OP_GROUPREF_EXISTS:
1897 /* The regex syntax for this is: '(?(group)then|else)', where
1898 'group' is either an integer group number or a group name,
1899 'then' and 'else' are sub-regexes, and 'else' is optional. */
1900 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001901 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001902 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001903 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001904 code--; /* The skip is relative to the first arg! */
1905 /* There are two possibilities here: if there is both a 'then'
1906 part and an 'else' part, the generated code looks like:
1907
1908 GROUPREF_EXISTS
1909 <group>
1910 <skipyes>
1911 ...then part...
1912 JUMP
1913 <skipno>
1914 (<skipyes> jumps here)
1915 ...else part...
1916 (<skipno> jumps here)
1917
1918 If there is only a 'then' part, it looks like:
1919
1920 GROUPREF_EXISTS
1921 <group>
1922 <skip>
1923 ...then part...
1924 (<skip> jumps here)
1925
1926 There is no direct way to decide which it is, and we don't want
1927 to allow arbitrary jumps anywhere in the code; so we just look
1928 for a JUMP opcode preceding our skip target.
1929 */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001930 if (skip >= 3 && skip-3 < (Py_uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001931 code[skip-3] == SRE_OP_JUMP)
1932 {
1933 VTRACE(("both then and else parts present\n"));
1934 if (!_validate_inner(code+1, code+skip-3, groups))
1935 FAIL;
1936 code += skip-2; /* Position after JUMP, at <skipno> */
1937 GET_SKIP;
1938 if (!_validate_inner(code, code+skip-1, groups))
1939 FAIL;
1940 code += skip-1;
1941 }
1942 else {
1943 VTRACE(("only a then part present\n"));
1944 if (!_validate_inner(code+1, code+skip-1, groups))
1945 FAIL;
1946 code += skip-1;
1947 }
1948 break;
1949
1950 case SRE_OP_ASSERT:
1951 case SRE_OP_ASSERT_NOT:
1952 GET_SKIP;
1953 GET_ARG; /* 0 for lookahead, width for lookbehind */
1954 code--; /* Back up over arg to simplify math below */
1955 if (arg & 0x80000000)
1956 FAIL; /* Width too large */
1957 /* Stop 1 before the end; we check the SUCCESS below */
1958 if (!_validate_inner(code+1, code+skip-2, groups))
1959 FAIL;
1960 code += skip-2;
1961 GET_OP;
1962 if (op != SRE_OP_SUCCESS)
1963 FAIL;
1964 break;
1965
1966 default:
1967 FAIL;
1968
1969 }
1970 }
1971
1972 VTRACE(("okay\n"));
1973 return 1;
1974}
1975
1976static int
1977_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1978{
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001979 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1980 code >= end || end[-1] != SRE_OP_SUCCESS)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001981 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001982 return _validate_inner(code, end-1, groups);
1983}
1984
1985static int
1986_validate(PatternObject *self)
1987{
1988 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1989 {
1990 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1991 return 0;
1992 }
1993 else
1994 VTRACE(("Success!\n"));
1995 return 1;
1996}
1997
1998/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001999/* match methods */
2000
2001static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002002match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002003{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002004 Py_XDECREF(self->regs);
2005 Py_XDECREF(self->string);
2006 Py_DECREF(self->pattern);
2007 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00002008}
2009
2010static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002011match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00002012{
Serhiy Storchaka25324972013-10-16 12:46:28 +03002013 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002014 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03002015 Py_buffer view;
2016 PyObject *result;
2017 void* ptr;
2018
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002019 if (index < 0 || index >= self->groups) {
2020 /* raise IndexError if we were given a bad group number */
2021 PyErr_SetString(
2022 PyExc_IndexError,
2023 "no such group"
2024 );
2025 return NULL;
2026 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002027
Fredrik Lundh6f013982000-07-03 18:44:21 +00002028 index *= 2;
2029
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002030 if (self->string == Py_None || self->mark[index] < 0) {
2031 /* return default value if the string or group is undefined */
2032 Py_INCREF(def);
2033 return def;
2034 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002035
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002036 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03002037 if (ptr == NULL)
2038 return NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002039 result = getslice(isbytes, ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +03002040 self->string, self->mark[index], self->mark[index+1]);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002041 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03002042 PyBuffer_Release(&view);
2043 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002044}
2045
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002046static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002047match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002048{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002049 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002050
Guido van Rossumddefaf32007-01-14 03:31:43 +00002051 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002052 /* Default value */
2053 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00002054
Christian Heimes217cfd12007-12-02 14:31:20 +00002055 if (PyLong_Check(index))
2056 return PyLong_AsSsize_t(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00002057
Fredrik Lundh6f013982000-07-03 18:44:21 +00002058 i = -1;
2059
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002060 if (self->pattern->groupindex) {
2061 index = PyObject_GetItem(self->pattern->groupindex, index);
2062 if (index) {
Neal Norwitz1fe5f382007-08-31 04:32:55 +00002063 if (PyLong_Check(index))
Christian Heimes217cfd12007-12-02 14:31:20 +00002064 i = PyLong_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00002065 Py_DECREF(index);
2066 } else
2067 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002068 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002069
2070 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002071}
2072
2073static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002074match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002075{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002076 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002077}
2078
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002079/*[clinic input]
2080_sre.SRE_Match.expand
2081
2082 template: object
2083
2084Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
2085[clinic start generated code]*/
2086
2087static PyObject *
2088_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
2089/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002090{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002091 /* delegate to Python code */
2092 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00002093 SRE_PY_MODULE, "_expand",
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002094 PyTuple_Pack(3, self->pattern, self, template)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002095 );
2096}
2097
2098static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002099match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002100{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002101 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002102 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002103
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002104 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002105
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002106 switch (size) {
2107 case 0:
2108 result = match_getslice(self, Py_False, Py_None);
2109 break;
2110 case 1:
2111 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2112 break;
2113 default:
2114 /* fetch multiple items */
2115 result = PyTuple_New(size);
2116 if (!result)
2117 return NULL;
2118 for (i = 0; i < size; i++) {
2119 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002120 self, PyTuple_GET_ITEM(args, i), Py_None
2121 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002122 if (!item) {
2123 Py_DECREF(result);
2124 return NULL;
2125 }
2126 PyTuple_SET_ITEM(result, i, item);
2127 }
2128 break;
2129 }
2130 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002131}
2132
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002133/*[clinic input]
2134_sre.SRE_Match.groups
2135
2136 default: object = None
2137 Is used for groups that did not participate in the match.
2138
2139Return a tuple containing all the subgroups of the match, from 1.
2140[clinic start generated code]*/
2141
2142static PyObject *
2143_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2144/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002145{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002146 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002147 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002148
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002149 result = PyTuple_New(self->groups-1);
2150 if (!result)
2151 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002152
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002153 for (index = 1; index < self->groups; index++) {
2154 PyObject* item;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002155 item = match_getslice_by_index(self, index, default_value);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002156 if (!item) {
2157 Py_DECREF(result);
2158 return NULL;
2159 }
2160 PyTuple_SET_ITEM(result, index-1, item);
2161 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002162
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002163 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002164}
2165
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002166/*[clinic input]
2167_sre.SRE_Match.groupdict
2168
2169 default: object = None
2170 Is used for groups that did not participate in the match.
2171
2172Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2173[clinic start generated code]*/
2174
2175static PyObject *
2176_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2177/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002178{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002179 PyObject* result;
2180 PyObject* keys;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002181 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002182
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002183 result = PyDict_New();
2184 if (!result || !self->pattern->groupindex)
2185 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002186
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002187 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002188 if (!keys)
2189 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002190
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002191 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002192 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002193 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002194 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002195 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002196 if (!key)
2197 goto failed;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002198 value = match_getslice(self, key, default_value);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002199 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002200 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002201 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002202 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002203 status = PyDict_SetItem(result, key, value);
2204 Py_DECREF(value);
2205 if (status < 0)
2206 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002207 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002208
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002209 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002210
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002211 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002212
2213failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00002214 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002215 Py_DECREF(result);
2216 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002217}
2218
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002219/*[clinic input]
2220_sre.SRE_Match.start -> Py_ssize_t
2221
2222 group: object(c_default="NULL") = 0
2223 /
2224
2225Return index of the start of the substring matched by group.
2226[clinic start generated code]*/
2227
2228static Py_ssize_t
2229_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2230/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002231{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002232 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002233
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002234 if (index < 0 || index >= self->groups) {
2235 PyErr_SetString(
2236 PyExc_IndexError,
2237 "no such group"
2238 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002239 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002240 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002241
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002242 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002243 return self->mark[index*2];
Guido van Rossumb700df92000-03-31 14:59:30 +00002244}
2245
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002246/*[clinic input]
2247_sre.SRE_Match.end -> Py_ssize_t
2248
2249 group: object(c_default="NULL") = 0
2250 /
2251
2252Return index of the end of the substring matched by group.
2253[clinic start generated code]*/
2254
2255static Py_ssize_t
2256_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2257/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002258{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002259 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002260
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002261 if (index < 0 || index >= self->groups) {
2262 PyErr_SetString(
2263 PyExc_IndexError,
2264 "no such group"
2265 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002266 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002267 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002268
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002269 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002270 return self->mark[index*2+1];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002271}
2272
2273LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002274_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002275{
2276 PyObject* pair;
2277 PyObject* item;
2278
2279 pair = PyTuple_New(2);
2280 if (!pair)
2281 return NULL;
2282
Christian Heimes217cfd12007-12-02 14:31:20 +00002283 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002284 if (!item)
2285 goto error;
2286 PyTuple_SET_ITEM(pair, 0, item);
2287
Christian Heimes217cfd12007-12-02 14:31:20 +00002288 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002289 if (!item)
2290 goto error;
2291 PyTuple_SET_ITEM(pair, 1, item);
2292
2293 return pair;
2294
2295 error:
2296 Py_DECREF(pair);
2297 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002298}
2299
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002300/*[clinic input]
2301_sre.SRE_Match.span
2302
2303 group: object(c_default="NULL") = 0
2304 /
2305
2306For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).
2307[clinic start generated code]*/
2308
2309static PyObject *
2310_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2311/*[clinic end generated code: output=f02ae40594d14fe6 input=49092b6008d176d3]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002312{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002313 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002314
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002315 if (index < 0 || index >= self->groups) {
2316 PyErr_SetString(
2317 PyExc_IndexError,
2318 "no such group"
2319 );
2320 return NULL;
2321 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002322
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002323 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002324 return _pair(self->mark[index*2], self->mark[index*2+1]);
2325}
2326
2327static PyObject*
2328match_regs(MatchObject* self)
2329{
2330 PyObject* regs;
2331 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002332 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002333
2334 regs = PyTuple_New(self->groups);
2335 if (!regs)
2336 return NULL;
2337
2338 for (index = 0; index < self->groups; index++) {
2339 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2340 if (!item) {
2341 Py_DECREF(regs);
2342 return NULL;
2343 }
2344 PyTuple_SET_ITEM(regs, index, item);
2345 }
2346
2347 Py_INCREF(regs);
2348 self->regs = regs;
2349
2350 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002351}
2352
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002353/*[clinic input]
2354_sre.SRE_Match.__copy__
2355
2356[clinic start generated code]*/
2357
2358static PyObject *
2359_sre_SRE_Match___copy___impl(MatchObject *self)
2360/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002361{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002362#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002363 MatchObject* copy;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002364 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00002365
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002366 slots = 2 * (self->pattern->groups+1);
2367
2368 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2369 if (!copy)
2370 return NULL;
2371
2372 /* this value a constant, but any compiler should be able to
2373 figure that out all by itself */
2374 offset = offsetof(MatchObject, string);
2375
2376 Py_XINCREF(self->pattern);
2377 Py_XINCREF(self->string);
2378 Py_XINCREF(self->regs);
2379
2380 memcpy((char*) copy + offset, (char*) self + offset,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002381 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002382
2383 return (PyObject*) copy;
2384#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002385 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002386 return NULL;
2387#endif
2388}
2389
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002390/*[clinic input]
2391_sre.SRE_Match.__deepcopy__
2392
2393 memo: object
2394
2395[clinic start generated code]*/
2396
2397static PyObject *
2398_sre_SRE_Match___deepcopy___impl(MatchObject *self, PyObject *memo)
2399/*[clinic end generated code: output=2b657578eb03f4a3 input=b65b72489eac64cc]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002400{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002401#ifdef USE_BUILTIN_COPY
2402 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002403
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002404 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002405 if (!copy)
2406 return NULL;
2407
2408 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2409 !deepcopy(&copy->string, memo) ||
2410 !deepcopy(&copy->regs, memo)) {
2411 Py_DECREF(copy);
2412 return NULL;
2413 }
2414
2415#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002416 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2417 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002418#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002419}
2420
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002421PyDoc_STRVAR(match_doc,
2422"The result of re.match() and re.search().\n\
2423Match objects always have a boolean value of True.");
2424
2425PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002426"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002427 Return subgroup(s) of the match by indices or names.\n\
2428 For 0 returns the entire match.");
2429
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002430static PyObject *
2431match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002432{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002433 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002434 return PyLong_FromSsize_t(self->lastindex);
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002435 Py_INCREF(Py_None);
2436 return Py_None;
Guido van Rossumb700df92000-03-31 14:59:30 +00002437}
2438
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002439static PyObject *
2440match_lastgroup_get(MatchObject *self)
2441{
2442 if (self->pattern->indexgroup && self->lastindex >= 0) {
2443 PyObject* result = PySequence_GetItem(
2444 self->pattern->indexgroup, self->lastindex
2445 );
2446 if (result)
2447 return result;
2448 PyErr_Clear();
2449 }
2450 Py_INCREF(Py_None);
2451 return Py_None;
2452}
2453
2454static PyObject *
2455match_regs_get(MatchObject *self)
2456{
2457 if (self->regs) {
2458 Py_INCREF(self->regs);
2459 return self->regs;
2460 } else
2461 return match_regs(self);
2462}
2463
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002464static PyObject *
2465match_repr(MatchObject *self)
2466{
2467 PyObject *result;
2468 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2469 if (group0 == NULL)
2470 return NULL;
2471 result = PyUnicode_FromFormat(
2472 "<%s object; span=(%d, %d), match=%.50R>",
2473 Py_TYPE(self)->tp_name,
2474 self->mark[0], self->mark[1], group0);
2475 Py_DECREF(group0);
2476 return result;
2477}
2478
2479
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002480static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002481pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002482{
2483 /* create match object (from state object) */
2484
2485 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002486 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002487 char* base;
2488 int n;
2489
2490 if (status > 0) {
2491
2492 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002493 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002494 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2495 2*(pattern->groups+1));
2496 if (!match)
2497 return NULL;
2498
2499 Py_INCREF(pattern);
2500 match->pattern = pattern;
2501
2502 Py_INCREF(state->string);
2503 match->string = state->string;
2504
2505 match->regs = NULL;
2506 match->groups = pattern->groups+1;
2507
2508 /* fill in group slices */
2509
2510 base = (char*) state->beginning;
2511 n = state->charsize;
2512
2513 match->mark[0] = ((char*) state->start - base) / n;
2514 match->mark[1] = ((char*) state->ptr - base) / n;
2515
2516 for (i = j = 0; i < pattern->groups; i++, j+=2)
2517 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2518 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2519 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2520 } else
2521 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2522
2523 match->pos = state->pos;
2524 match->endpos = state->endpos;
2525
2526 match->lastindex = state->lastindex;
2527
2528 return (PyObject*) match;
2529
2530 } else if (status == 0) {
2531
2532 /* no match */
2533 Py_INCREF(Py_None);
2534 return Py_None;
2535
2536 }
2537
2538 /* internal error */
2539 pattern_error(status);
2540 return NULL;
2541}
2542
2543
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002544/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002545/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002546
2547static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002548scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002549{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002550 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002551 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002552 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002553}
2554
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002555/*[clinic input]
2556_sre.SRE_Scanner.match
2557
2558[clinic start generated code]*/
2559
2560static PyObject *
2561_sre_SRE_Scanner_match_impl(ScannerObject *self)
2562/*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002563{
2564 SRE_STATE* state = &self->state;
2565 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002566 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002567
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002568 if (state->start == NULL)
2569 Py_RETURN_NONE;
2570
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002571 state_reset(state);
2572
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002573 state->ptr = state->start;
2574
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03002575 status = sre_match(state, PatternObject_GetCode(self->pattern), 0);
Thomas Wouters89f507f2006-12-13 04:49:30 +00002576 if (PyErr_Occurred())
2577 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002578
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002579 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002580 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002581
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002582 if (status == 0)
2583 state->start = NULL;
2584 else if (state->ptr != state->start)
2585 state->start = state->ptr;
2586 else if (state->ptr != state->end)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002587 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002588 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002589 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002590
2591 return match;
2592}
2593
2594
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002595/*[clinic input]
2596_sre.SRE_Scanner.search
2597
2598[clinic start generated code]*/
2599
2600static PyObject *
2601_sre_SRE_Scanner_search_impl(ScannerObject *self)
2602/*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002603{
2604 SRE_STATE* state = &self->state;
2605 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002606 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002607
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002608 if (state->start == NULL)
2609 Py_RETURN_NONE;
2610
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002611 state_reset(state);
2612
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002613 state->ptr = state->start;
2614
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002615 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002616 if (PyErr_Occurred())
2617 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002618
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002619 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002620 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002621
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002622 if (status == 0)
2623 state->start = NULL;
2624 else if (state->ptr != state->start)
2625 state->start = state->ptr;
2626 else if (state->ptr != state->end)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002627 state->start = (void*) ((char*) state->ptr + state->charsize);
2628 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002629 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002630
2631 return match;
2632}
2633
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002634static PyObject *
2635pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002636{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002637 ScannerObject* scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002638
2639 /* create scanner object */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002640 scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2641 if (!scanner)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002642 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002643 scanner->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002644
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002645 /* create search state object */
2646 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2647 Py_DECREF(scanner);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002648 return NULL;
2649 }
2650
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002651 Py_INCREF(self);
2652 scanner->pattern = (PyObject*) self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002653
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002654 return (PyObject*) scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002655}
2656
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002657#include "clinic/_sre.c.h"
2658
2659static PyMethodDef pattern_methods[] = {
2660 _SRE_SRE_PATTERN_MATCH_METHODDEF
2661 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2662 _SRE_SRE_PATTERN_SEARCH_METHODDEF
2663 _SRE_SRE_PATTERN_SUB_METHODDEF
2664 _SRE_SRE_PATTERN_SUBN_METHODDEF
2665 _SRE_SRE_PATTERN_FINDALL_METHODDEF
2666 _SRE_SRE_PATTERN_SPLIT_METHODDEF
2667 _SRE_SRE_PATTERN_FINDITER_METHODDEF
2668 _SRE_SRE_PATTERN_SCANNER_METHODDEF
2669 _SRE_SRE_PATTERN___COPY___METHODDEF
2670 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2671 {NULL, NULL}
2672};
2673
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002674static PyGetSetDef pattern_getset[] = {
2675 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2676 "A dictionary mapping group names to group numbers."},
2677 {NULL} /* Sentinel */
2678};
2679
2680#define PAT_OFF(x) offsetof(PatternObject, x)
2681static PyMemberDef pattern_members[] = {
2682 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
2683 {"flags", T_INT, PAT_OFF(flags), READONLY},
2684 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
2685 {NULL} /* Sentinel */
2686};
2687
2688static PyTypeObject Pattern_Type = {
2689 PyVarObject_HEAD_INIT(NULL, 0)
2690 "_" SRE_MODULE ".SRE_Pattern",
2691 sizeof(PatternObject), sizeof(SRE_CODE),
2692 (destructor)pattern_dealloc, /* tp_dealloc */
2693 0, /* tp_print */
2694 0, /* tp_getattr */
2695 0, /* tp_setattr */
2696 0, /* tp_reserved */
2697 (reprfunc)pattern_repr, /* tp_repr */
2698 0, /* tp_as_number */
2699 0, /* tp_as_sequence */
2700 0, /* tp_as_mapping */
2701 0, /* tp_hash */
2702 0, /* tp_call */
2703 0, /* tp_str */
2704 0, /* tp_getattro */
2705 0, /* tp_setattro */
2706 0, /* tp_as_buffer */
2707 Py_TPFLAGS_DEFAULT, /* tp_flags */
2708 pattern_doc, /* tp_doc */
2709 0, /* tp_traverse */
2710 0, /* tp_clear */
2711 0, /* tp_richcompare */
2712 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2713 0, /* tp_iter */
2714 0, /* tp_iternext */
2715 pattern_methods, /* tp_methods */
2716 pattern_members, /* tp_members */
2717 pattern_getset, /* tp_getset */
2718};
2719
2720
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002721static PyMethodDef match_methods[] = {
2722 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2723 _SRE_SRE_MATCH_START_METHODDEF
2724 _SRE_SRE_MATCH_END_METHODDEF
2725 _SRE_SRE_MATCH_SPAN_METHODDEF
2726 _SRE_SRE_MATCH_GROUPS_METHODDEF
2727 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2728 _SRE_SRE_MATCH_EXPAND_METHODDEF
2729 _SRE_SRE_MATCH___COPY___METHODDEF
2730 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2731 {NULL, NULL}
2732};
2733
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002734static PyGetSetDef match_getset[] = {
2735 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
2736 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
2737 {"regs", (getter)match_regs_get, (setter)NULL},
2738 {NULL}
2739};
2740
2741#define MATCH_OFF(x) offsetof(MatchObject, x)
2742static PyMemberDef match_members[] = {
2743 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
2744 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
2745 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
2746 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
2747 {NULL}
2748};
2749
2750/* FIXME: implement setattr("string", None) as a special case (to
2751 detach the associated string, if any */
2752
2753static PyTypeObject Match_Type = {
2754 PyVarObject_HEAD_INIT(NULL,0)
2755 "_" SRE_MODULE ".SRE_Match",
2756 sizeof(MatchObject), sizeof(Py_ssize_t),
2757 (destructor)match_dealloc, /* tp_dealloc */
2758 0, /* tp_print */
2759 0, /* tp_getattr */
2760 0, /* tp_setattr */
2761 0, /* tp_reserved */
2762 (reprfunc)match_repr, /* tp_repr */
2763 0, /* tp_as_number */
2764 0, /* tp_as_sequence */
2765 0, /* tp_as_mapping */
2766 0, /* tp_hash */
2767 0, /* tp_call */
2768 0, /* tp_str */
2769 0, /* tp_getattro */
2770 0, /* tp_setattro */
2771 0, /* tp_as_buffer */
2772 Py_TPFLAGS_DEFAULT, /* tp_flags */
2773 match_doc, /* tp_doc */
2774 0, /* tp_traverse */
2775 0, /* tp_clear */
2776 0, /* tp_richcompare */
2777 0, /* tp_weaklistoffset */
2778 0, /* tp_iter */
2779 0, /* tp_iternext */
2780 match_methods, /* tp_methods */
2781 match_members, /* tp_members */
2782 match_getset, /* tp_getset */
2783};
2784
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002785static PyMethodDef scanner_methods[] = {
2786 _SRE_SRE_SCANNER_MATCH_METHODDEF
2787 _SRE_SRE_SCANNER_SEARCH_METHODDEF
2788 {NULL, NULL}
2789};
2790
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002791#define SCAN_OFF(x) offsetof(ScannerObject, x)
2792static PyMemberDef scanner_members[] = {
2793 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2794 {NULL} /* Sentinel */
2795};
2796
2797static PyTypeObject Scanner_Type = {
2798 PyVarObject_HEAD_INIT(NULL, 0)
2799 "_" SRE_MODULE ".SRE_Scanner",
2800 sizeof(ScannerObject), 0,
2801 (destructor)scanner_dealloc,/* tp_dealloc */
2802 0, /* tp_print */
2803 0, /* tp_getattr */
2804 0, /* tp_setattr */
2805 0, /* tp_reserved */
2806 0, /* tp_repr */
2807 0, /* tp_as_number */
2808 0, /* tp_as_sequence */
2809 0, /* tp_as_mapping */
2810 0, /* tp_hash */
2811 0, /* tp_call */
2812 0, /* tp_str */
2813 0, /* tp_getattro */
2814 0, /* tp_setattro */
2815 0, /* tp_as_buffer */
2816 Py_TPFLAGS_DEFAULT, /* tp_flags */
2817 0, /* tp_doc */
2818 0, /* tp_traverse */
2819 0, /* tp_clear */
2820 0, /* tp_richcompare */
2821 0, /* tp_weaklistoffset */
2822 0, /* tp_iter */
2823 0, /* tp_iternext */
2824 scanner_methods, /* tp_methods */
2825 scanner_members, /* tp_members */
2826 0, /* tp_getset */
2827};
2828
Guido van Rossumb700df92000-03-31 14:59:30 +00002829static PyMethodDef _functions[] = {
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002830 _SRE_COMPILE_METHODDEF
2831 _SRE_GETCODESIZE_METHODDEF
2832 _SRE_GETLOWER_METHODDEF
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002833 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002834};
2835
Martin v. Löwis1a214512008-06-11 05:26:20 +00002836static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002837 PyModuleDef_HEAD_INIT,
2838 "_" SRE_MODULE,
2839 NULL,
2840 -1,
2841 _functions,
2842 NULL,
2843 NULL,
2844 NULL,
2845 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002846};
2847
2848PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002849{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002850 PyObject* m;
2851 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002852 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002853
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002854 /* Patch object types */
2855 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2856 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002857 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002858
Martin v. Löwis1a214512008-06-11 05:26:20 +00002859 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002860 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002861 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002862 d = PyModule_GetDict(m);
2863
Christian Heimes217cfd12007-12-02 14:31:20 +00002864 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002865 if (x) {
2866 PyDict_SetItemString(d, "MAGIC", x);
2867 Py_DECREF(x);
2868 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002869
Christian Heimes217cfd12007-12-02 14:31:20 +00002870 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002871 if (x) {
2872 PyDict_SetItemString(d, "CODESIZE", x);
2873 Py_DECREF(x);
2874 }
2875
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002876 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2877 if (x) {
2878 PyDict_SetItemString(d, "MAXREPEAT", x);
2879 Py_DECREF(x);
2880 }
2881
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03002882 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2883 if (x) {
2884 PyDict_SetItemString(d, "MAXGROUPS", x);
2885 Py_DECREF(x);
2886 }
2887
Neal Norwitzfe537132007-08-26 03:55:15 +00002888 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002889 if (x) {
2890 PyDict_SetItemString(d, "copyright", x);
2891 Py_DECREF(x);
2892 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002893 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002894}
2895
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002896/* vim:ts=4:sw=4:et
2897*/