blob: 4016a4533e0bccf6a7658bbccad20d8a5c3a958f [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000038static char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000063/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064
65/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000066#define USE_FAST_SEARCH
67
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000068/* enables copy/deepcopy handling (work in progress) */
69#undef USE_BUILTIN_COPY
70
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000071/* -------------------------------------------------------------------- */
72
Fredrik Lundh80946112000-06-29 18:03:25 +000073#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000074#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000075#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000076/* fastest possible local call under MSVC */
77#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000078#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000079#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080#else
81#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000082#endif
83
84/* error codes */
85#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000086#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000087#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000088#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000089#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000090
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000091#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000092#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000093#else
94#define TRACE(v)
95#endif
96
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000097/* -------------------------------------------------------------------- */
98/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000099
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000100#define SRE_IS_DIGIT(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300101 ((ch) < 128 && Py_ISDIGIT(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000102#define SRE_IS_SPACE(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300103 ((ch) < 128 && Py_ISSPACE(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000104#define SRE_IS_LINEBREAK(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300105 ((ch) == '\n')
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000106#define SRE_IS_ALNUM(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300107 ((ch) < 128 && Py_ISALNUM(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000108#define SRE_IS_WORD(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300109 ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
Guido van Rossumb700df92000-03-31 14:59:30 +0000110
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000111static unsigned int sre_lower(unsigned int ch)
112{
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300113 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000114}
115
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200116static unsigned int sre_upper(unsigned int ch)
117{
118 return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
119}
120
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000121/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000122/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
123 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000124#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000125#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
126
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000127static unsigned int sre_lower_locale(unsigned int ch)
128{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000129 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000130}
131
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200132static unsigned int sre_upper_locale(unsigned int ch)
133{
134 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
135}
136
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000137/* unicode-specific character predicates */
138
Victor Stinner0058b862011-09-29 03:27:47 +0200139#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
140#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
141#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
142#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
143#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000144
145static unsigned int sre_lower_unicode(unsigned int ch)
146{
Victor Stinner0058b862011-09-29 03:27:47 +0200147 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000148}
149
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200150static unsigned int sre_upper_unicode(unsigned int ch)
151{
152 return (unsigned int) Py_UNICODE_TOUPPER(ch);
153}
154
Guido van Rossumb700df92000-03-31 14:59:30 +0000155LOCAL(int)
156sre_category(SRE_CODE category, unsigned int ch)
157{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000158 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000159
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000160 case SRE_CATEGORY_DIGIT:
161 return SRE_IS_DIGIT(ch);
162 case SRE_CATEGORY_NOT_DIGIT:
163 return !SRE_IS_DIGIT(ch);
164 case SRE_CATEGORY_SPACE:
165 return SRE_IS_SPACE(ch);
166 case SRE_CATEGORY_NOT_SPACE:
167 return !SRE_IS_SPACE(ch);
168 case SRE_CATEGORY_WORD:
169 return SRE_IS_WORD(ch);
170 case SRE_CATEGORY_NOT_WORD:
171 return !SRE_IS_WORD(ch);
172 case SRE_CATEGORY_LINEBREAK:
173 return SRE_IS_LINEBREAK(ch);
174 case SRE_CATEGORY_NOT_LINEBREAK:
175 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000176
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000177 case SRE_CATEGORY_LOC_WORD:
178 return SRE_LOC_IS_WORD(ch);
179 case SRE_CATEGORY_LOC_NOT_WORD:
180 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000181
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000182 case SRE_CATEGORY_UNI_DIGIT:
183 return SRE_UNI_IS_DIGIT(ch);
184 case SRE_CATEGORY_UNI_NOT_DIGIT:
185 return !SRE_UNI_IS_DIGIT(ch);
186 case SRE_CATEGORY_UNI_SPACE:
187 return SRE_UNI_IS_SPACE(ch);
188 case SRE_CATEGORY_UNI_NOT_SPACE:
189 return !SRE_UNI_IS_SPACE(ch);
190 case SRE_CATEGORY_UNI_WORD:
191 return SRE_UNI_IS_WORD(ch);
192 case SRE_CATEGORY_UNI_NOT_WORD:
193 return !SRE_UNI_IS_WORD(ch);
194 case SRE_CATEGORY_UNI_LINEBREAK:
195 return SRE_UNI_IS_LINEBREAK(ch);
196 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
197 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000198 }
199 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000200}
201
202/* helpers */
203
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000204static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000205data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000206{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000207 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000208 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000209 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000210 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000211 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000212}
213
214static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000215data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000216{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000217 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000218 minsize = state->data_stack_base+size;
219 cursize = state->data_stack_size;
220 if (cursize < minsize) {
221 void* stack;
222 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300223 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000224 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000225 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000226 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000227 return SRE_ERROR_MEMORY;
228 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000229 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000230 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000231 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000232 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000233}
234
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000235/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000236
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300237#define SRE_CHAR Py_UCS1
238#define SIZEOF_SRE_CHAR 1
239#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300240#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000241
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300242/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000243
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300244#define SRE_CHAR Py_UCS2
245#define SIZEOF_SRE_CHAR 2
246#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300247#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000248
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300249/* generate 32-bit unicode version */
250
251#define SRE_CHAR Py_UCS4
252#define SIZEOF_SRE_CHAR 4
253#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300254#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000255
256/* -------------------------------------------------------------------- */
257/* factories and destructors */
258
259/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100260static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300261static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
Guido van Rossumb700df92000-03-31 14:59:30 +0000262
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300263
264/*[clinic input]
265module _sre
266class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
267class _sre.SRE_Match "MatchObject *" "&Match_Type"
268class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
269[clinic start generated code]*/
270/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
271
Larry Hastings2d0a69a2015-05-03 14:49:19 -0700272static PyTypeObject Pattern_Type;
273static PyTypeObject Match_Type;
274static PyTypeObject Scanner_Type;
275
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300276/*[clinic input]
277_sre.getcodesize -> int
278[clinic start generated code]*/
279
280static int
281_sre_getcodesize_impl(PyModuleDef *module)
282/*[clinic end generated code: output=794f1f98ef4883e5 input=bd6f6ecf4916bb2b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000283{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300284 return sizeof(SRE_CODE);
Guido van Rossumb700df92000-03-31 14:59:30 +0000285}
286
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300287/*[clinic input]
288_sre.getlower -> int
289
290 character: int
291 flags: int
292 /
293
294[clinic start generated code]*/
295
296static int
297_sre_getlower_impl(PyModuleDef *module, int character, int flags)
298/*[clinic end generated code: output=5fc3616ae2a4c306 input=087d2f1c44bbca6f]*/
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000299{
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000300 if (flags & SRE_FLAG_LOCALE)
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300301 return sre_lower_locale(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000302 if (flags & SRE_FLAG_UNICODE)
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300303 return sre_lower_unicode(character);
304 return sre_lower(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000305}
306
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000307LOCAL(void)
308state_reset(SRE_STATE* state)
309{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000310 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000311 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000312
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000313 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000314 state->lastindex = -1;
315
316 state->repeat = NULL;
317
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000318 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000319}
320
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000321static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200322getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300323 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600324 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000325{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000326 /* given a python object, return a data pointer, a length (in
327 characters), and a character size. return NULL if the object
328 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000329
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000330 /* Unicode objects do not support the buffer API. So, get the data
331 directly instead. */
332 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200333 if (PyUnicode_READY(string) == -1)
334 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200335 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200336 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300337 *p_isbytes = 0;
338 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000339 }
340
Victor Stinner0058b862011-09-29 03:27:47 +0200341 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300342 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200343 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300344 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000345 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000346
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300347 *p_length = view->len;
348 *p_charsize = 1;
349 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000350
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300351 if (view->buf == NULL) {
352 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
353 PyBuffer_Release(view);
354 view->buf = NULL;
355 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000356 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300357 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000358}
359
360LOCAL(PyObject*)
361state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000362 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000363{
364 /* prepare state object */
365
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000366 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300367 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000368 void* ptr;
369
370 memset(state, 0, sizeof(SRE_STATE));
371
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300372 state->mark = PyMem_New(void *, pattern->groups * 2);
373 if (!state->mark) {
374 PyErr_NoMemory();
375 goto err;
376 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000377 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000378 state->lastindex = -1;
379
Benjamin Petersone48944b2012-03-07 14:50:25 -0600380 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300381 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000382 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600383 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000384
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300385 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600386 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200387 "cannot use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600388 goto err;
389 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300390 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600391 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200392 "cannot use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600393 goto err;
394 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000395
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000396 /* adjust boundaries */
397 if (start < 0)
398 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000399 else if (start > length)
400 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000401
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000402 if (end < 0)
403 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000404 else if (end > length)
405 end = length;
406
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300407 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000408 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000409
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000410 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000411
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000412 state->start = (void*) ((char*) ptr + start * state->charsize);
413 state->end = (void*) ((char*) ptr + end * state->charsize);
414
415 Py_INCREF(string);
416 state->string = string;
417 state->pos = start;
418 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000419
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200420 if (pattern->flags & SRE_FLAG_LOCALE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000421 state->lower = sre_lower_locale;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200422 state->upper = sre_upper_locale;
423 }
424 else if (pattern->flags & SRE_FLAG_UNICODE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000425 state->lower = sre_lower_unicode;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200426 state->upper = sre_upper_unicode;
427 }
428 else {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000429 state->lower = sre_lower;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200430 state->upper = sre_upper;
431 }
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000432
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000433 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600434 err:
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300435 PyMem_Del(state->mark);
436 state->mark = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600437 if (state->buffer.buf)
438 PyBuffer_Release(&state->buffer);
439 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000440}
441
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000442LOCAL(void)
443state_fini(SRE_STATE* state)
444{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600445 if (state->buffer.buf)
446 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000447 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000448 data_stack_dealloc(state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300449 PyMem_Del(state->mark);
450 state->mark = NULL;
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000451}
452
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000453/* calculate offset from start of string */
454#define STATE_OFFSET(state, member)\
455 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
456
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000457LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300458getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300459 PyObject* string, Py_ssize_t start, Py_ssize_t end)
460{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300461 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300462 if (PyBytes_CheckExact(string) &&
463 start == 0 && end == PyBytes_GET_SIZE(string)) {
464 Py_INCREF(string);
465 return string;
466 }
467 return PyBytes_FromStringAndSize(
468 (const char *)ptr + start, end - start);
469 }
470 else {
471 return PyUnicode_Substring(string, start, end);
472 }
473}
474
475LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000476state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000477{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000478 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000479
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000480 index = (index - 1) * 2;
481
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000482 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000483 if (empty)
484 /* want empty string */
485 i = j = 0;
486 else {
487 Py_INCREF(Py_None);
488 return Py_None;
489 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000490 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000491 i = STATE_OFFSET(state, state->mark[index]);
492 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000493 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000494
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300495 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000496}
497
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000498static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100499pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000500{
501 switch (status) {
502 case SRE_ERROR_RECURSION_LIMIT:
503 PyErr_SetString(
504 PyExc_RuntimeError,
505 "maximum recursion limit exceeded"
506 );
507 break;
508 case SRE_ERROR_MEMORY:
509 PyErr_NoMemory();
510 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000511 case SRE_ERROR_INTERRUPTED:
512 /* An exception has already been raised, so let it fly */
513 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000514 default:
515 /* other error codes indicate compiler/engine bugs */
516 PyErr_SetString(
517 PyExc_RuntimeError,
518 "internal error in regular expression engine"
519 );
520 }
521}
522
Guido van Rossumb700df92000-03-31 14:59:30 +0000523static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000524pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000525{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000526 if (self->weakreflist != NULL)
527 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000528 Py_XDECREF(self->pattern);
529 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000530 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000531 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000532}
533
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300534LOCAL(Py_ssize_t)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300535sre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300536{
537 if (state->charsize == 1)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300538 return sre_ucs1_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300539 if (state->charsize == 2)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300540 return sre_ucs2_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300541 assert(state->charsize == 4);
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300542 return sre_ucs4_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300543}
544
545LOCAL(Py_ssize_t)
546sre_search(SRE_STATE* state, SRE_CODE* pattern)
547{
548 if (state->charsize == 1)
549 return sre_ucs1_search(state, pattern);
550 if (state->charsize == 2)
551 return sre_ucs2_search(state, pattern);
552 assert(state->charsize == 4);
553 return sre_ucs4_search(state, pattern);
554}
555
Larry Hastings16c51912014-01-07 11:53:01 -0800556static PyObject *
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200557fix_string_param(PyObject *string, PyObject *string2, const char *oldname)
558{
559 if (string2 != NULL) {
560 if (string != NULL) {
561 PyErr_Format(PyExc_TypeError,
562 "Argument given by name ('%s') and position (1)",
563 oldname);
564 return NULL;
565 }
566 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
567 "The '%s' keyword parameter name is deprecated. "
568 "Use 'string' instead.", oldname) < 0)
569 return NULL;
570 return string2;
571 }
572 if (string == NULL) {
573 PyErr_SetString(PyExc_TypeError,
574 "Required argument 'string' (pos 1) not found");
575 return NULL;
576 }
577 return string;
578}
Larry Hastings16c51912014-01-07 11:53:01 -0800579
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300580/*[clinic input]
581_sre.SRE_Pattern.match
582
583 string: object = NULL
584 pos: Py_ssize_t = 0
585 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
586 *
587 pattern: object = NULL
588
589Matches zero or more characters at the beginning of the string.
590[clinic start generated code]*/
591
Larry Hastings16c51912014-01-07 11:53:01 -0800592static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300593_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
594 Py_ssize_t pos, Py_ssize_t endpos,
595 PyObject *pattern)
596/*[clinic end generated code: output=74b4b1da3bb2d84e input=3d079aa99979b81d]*/
Larry Hastings16c51912014-01-07 11:53:01 -0800597{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000598 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100599 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300600 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000601
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200602 string = fix_string_param(string, pattern, "pattern");
603 if (!string)
604 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300605 if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000606 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000607
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000608 state.ptr = state.start;
609
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000610 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
611
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300612 status = sre_match(&state, PatternObject_GetCode(self), 0);
Guido van Rossumb700df92000-03-31 14:59:30 +0000613
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000614 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300615 if (PyErr_Occurred()) {
616 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000617 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300618 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000619
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300620 match = pattern_new_match(self, &state, status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000621 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300622 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000623}
624
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300625/*[clinic input]
626_sre.SRE_Pattern.fullmatch
627
628 string: object = NULL
629 pos: Py_ssize_t = 0
630 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
631 *
632 pattern: object = NULL
633
634Matches against all of the string
635[clinic start generated code]*/
636
637static PyObject *
638_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
639 Py_ssize_t pos, Py_ssize_t endpos,
640 PyObject *pattern)
641/*[clinic end generated code: output=1c98bc5da744ea94 input=d4228606cc12580f]*/
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200642{
643 SRE_STATE state;
644 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300645 PyObject *match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200646
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300647 string = fix_string_param(string, pattern, "pattern");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200648 if (!string)
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200649 return NULL;
650
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300651 if (!state_init(&state, self, string, pos, endpos))
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200652 return NULL;
653
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200654 state.ptr = state.start;
655
656 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
657
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300658 status = sre_match(&state, PatternObject_GetCode(self), 1);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200659
660 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300661 if (PyErr_Occurred()) {
662 state_fini(&state);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200663 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300664 }
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200665
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300666 match = pattern_new_match(self, &state, status);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200667 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300668 return match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200669}
670
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300671/*[clinic input]
672_sre.SRE_Pattern.search
673
674 string: object = NULL
675 pos: Py_ssize_t = 0
676 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
677 *
678 pattern: object = NULL
679
680Scan through string looking for a match, and return a corresponding match object instance.
681
682Return None if no position in the string matches.
683[clinic start generated code]*/
684
685static PyObject *
686_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
687 Py_ssize_t pos, Py_ssize_t endpos,
688 PyObject *pattern)
689/*[clinic end generated code: output=3839394a18e5ea4f input=dab42720f4be3a4b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000690{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000691 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100692 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300693 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000694
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300695 string = fix_string_param(string, pattern, "pattern");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200696 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000697 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000698
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300699 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000700 return NULL;
701
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000702 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
703
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300704 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000705
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000706 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
707
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300708 if (PyErr_Occurred()) {
709 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000710 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300711 }
Thomas Wouters89f507f2006-12-13 04:49:30 +0000712
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300713 match = pattern_new_match(self, &state, status);
714 state_fini(&state);
715 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000716}
717
718static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000719call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000720{
721 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000722 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000723 PyObject* func;
724 PyObject* result;
725
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000726 if (!args)
727 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000728 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000729 if (!name)
730 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000731 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000732 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000733 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000734 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000735 func = PyObject_GetAttrString(mod, function);
736 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000737 if (!func)
738 return NULL;
739 result = PyObject_CallObject(func, args);
740 Py_DECREF(func);
741 Py_DECREF(args);
742 return result;
743}
744
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000745#ifdef USE_BUILTIN_COPY
746static int
747deepcopy(PyObject** object, PyObject* memo)
748{
749 PyObject* copy;
750
751 copy = call(
752 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000753 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000754 );
755 if (!copy)
756 return 0;
757
758 Py_DECREF(*object);
759 *object = copy;
760
761 return 1; /* success */
762}
763#endif
764
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300765/*[clinic input]
766_sre.SRE_Pattern.findall
767
768 string: object = NULL
769 pos: Py_ssize_t = 0
770 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
771 *
772 source: object = NULL
773
774Return a list of all non-overlapping matches of pattern in string.
775[clinic start generated code]*/
776
777static PyObject *
778_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
779 Py_ssize_t pos, Py_ssize_t endpos,
780 PyObject *source)
781/*[clinic end generated code: output=51295498b300639d input=df688355c056b9de]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000782{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000783 SRE_STATE state;
784 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100785 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000786 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000787
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300788 string = fix_string_param(string, source, "source");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200789 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000790 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000791
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300792 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000793 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000794
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000795 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000796 if (!list) {
797 state_fini(&state);
798 return NULL;
799 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000800
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000801 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000802
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000803 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000804
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000805 state_reset(&state);
806
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000807 state.ptr = state.start;
808
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300809 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300810 if (PyErr_Occurred())
811 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000812
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000813 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000814 if (status == 0)
815 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000816 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000817 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000818 }
Tim Peters3d563502006-01-21 02:47:53 +0000819
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000820 /* don't bother to build a match object */
821 switch (self->groups) {
822 case 0:
823 b = STATE_OFFSET(&state, state.start);
824 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300825 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300826 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000827 if (!item)
828 goto error;
829 break;
830 case 1:
831 item = state_getslice(&state, 1, string, 1);
832 if (!item)
833 goto error;
834 break;
835 default:
836 item = PyTuple_New(self->groups);
837 if (!item)
838 goto error;
839 for (i = 0; i < self->groups; i++) {
840 PyObject* o = state_getslice(&state, i+1, string, 1);
841 if (!o) {
842 Py_DECREF(item);
843 goto error;
844 }
845 PyTuple_SET_ITEM(item, i, o);
846 }
847 break;
848 }
849
850 status = PyList_Append(list, item);
851 Py_DECREF(item);
852 if (status < 0)
853 goto error;
854
855 if (state.ptr == state.start)
856 state.start = (void*) ((char*) state.ptr + state.charsize);
857 else
858 state.start = state.ptr;
859
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000860 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000861
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000862 state_fini(&state);
863 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000864
865error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000866 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000867 state_fini(&state);
868 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000869
Guido van Rossumb700df92000-03-31 14:59:30 +0000870}
871
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300872/*[clinic input]
873_sre.SRE_Pattern.finditer
874
875 string: object
876 pos: Py_ssize_t = 0
877 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
878
879Return an iterator over all non-overlapping matches for the RE pattern in string.
880
881For each match, the iterator returns a match object.
882[clinic start generated code]*/
883
884static PyObject *
885_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
886 Py_ssize_t pos, Py_ssize_t endpos)
887/*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
Fredrik Lundh703ce812001-10-24 22:16:30 +0000888{
889 PyObject* scanner;
890 PyObject* search;
891 PyObject* iterator;
892
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300893 scanner = pattern_scanner(self, string, pos, endpos);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000894 if (!scanner)
895 return NULL;
896
897 search = PyObject_GetAttrString(scanner, "search");
898 Py_DECREF(scanner);
899 if (!search)
900 return NULL;
901
902 iterator = PyCallIter_New(search, Py_None);
903 Py_DECREF(search);
904
905 return iterator;
906}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000907
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300908/*[clinic input]
909_sre.SRE_Pattern.scanner
910
911 string: object
912 pos: Py_ssize_t = 0
913 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
914
915[clinic start generated code]*/
916
917static PyObject *
918_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
919 Py_ssize_t pos, Py_ssize_t endpos)
920/*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
921{
922 return pattern_scanner(self, string, pos, endpos);
923}
924
925/*[clinic input]
926_sre.SRE_Pattern.split
927
928 string: object = NULL
929 maxsplit: Py_ssize_t = 0
930 *
931 source: object = NULL
932
933Split string by the occurrences of pattern.
934[clinic start generated code]*/
935
936static PyObject *
937_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
938 Py_ssize_t maxsplit, PyObject *source)
939/*[clinic end generated code: output=20bac2ff55b9f84c input=41e0b2e35e599d7b]*/
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000940{
941 SRE_STATE state;
942 PyObject* list;
943 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100944 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000945 Py_ssize_t n;
946 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000947 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000948
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300949 string = fix_string_param(string, source, "source");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200950 if (!string)
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000951 return NULL;
952
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200953 assert(self->codesize != 0);
954 if (self->code[0] != SRE_OP_INFO || self->code[3] == 0) {
955 if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) {
956 PyErr_SetString(PyExc_ValueError,
957 "split() requires a non-empty pattern match.");
958 return NULL;
959 }
960 if (PyErr_WarnEx(PyExc_FutureWarning,
961 "split() requires a non-empty pattern match.",
962 1) < 0)
963 return NULL;
964 }
965
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300966 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000967 return NULL;
968
969 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000970 if (!list) {
971 state_fini(&state);
972 return NULL;
973 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000974
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000975 n = 0;
976 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000977
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000978 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000979
980 state_reset(&state);
981
982 state.ptr = state.start;
983
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300984 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300985 if (PyErr_Occurred())
986 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000987
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000988 if (status <= 0) {
989 if (status == 0)
990 break;
991 pattern_error(status);
992 goto error;
993 }
Tim Peters3d563502006-01-21 02:47:53 +0000994
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000995 if (state.start == state.ptr) {
996 if (last == state.end)
997 break;
998 /* skip one character */
999 state.start = (void*) ((char*) state.ptr + state.charsize);
1000 continue;
1001 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001002
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001003 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001004 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001005 string, STATE_OFFSET(&state, last),
1006 STATE_OFFSET(&state, state.start)
1007 );
1008 if (!item)
1009 goto error;
1010 status = PyList_Append(list, item);
1011 Py_DECREF(item);
1012 if (status < 0)
1013 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001014
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001015 /* add groups (if any) */
1016 for (i = 0; i < self->groups; i++) {
1017 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001018 if (!item)
1019 goto error;
1020 status = PyList_Append(list, item);
1021 Py_DECREF(item);
1022 if (status < 0)
1023 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001024 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001025
1026 n = n + 1;
1027
1028 last = state.start = state.ptr;
1029
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001030 }
1031
Fredrik Lundhf864aa82001-10-22 06:01:56 +00001032 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001033 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +00001034 string, STATE_OFFSET(&state, last), state.endpos
1035 );
1036 if (!item)
1037 goto error;
1038 status = PyList_Append(list, item);
1039 Py_DECREF(item);
1040 if (status < 0)
1041 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001042
1043 state_fini(&state);
1044 return list;
1045
1046error:
1047 Py_DECREF(list);
1048 state_fini(&state);
1049 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001050
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001051}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001052
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001053static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001054pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001055 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001056{
1057 SRE_STATE state;
1058 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001059 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001060 PyObject* item;
1061 PyObject* filter;
1062 PyObject* args;
1063 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001064 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01001065 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001066 Py_ssize_t n;
1067 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001068 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001069 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001070 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001071
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001072 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001073 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001074 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001075 Py_INCREF(filter);
1076 filter_is_callable = 1;
1077 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001078 /* if not callable, check if it's a literal string */
1079 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001080 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001081 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001082 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001083 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001084 if (charsize == 1)
1085 literal = memchr(ptr, '\\', n) == NULL;
1086 else
1087 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001088 } else {
1089 PyErr_Clear();
1090 literal = 0;
1091 }
Benjamin Petersone48944b2012-03-07 14:50:25 -06001092 if (view.buf)
1093 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001094 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001095 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001096 Py_INCREF(filter);
1097 filter_is_callable = 0;
1098 } else {
1099 /* not a literal; hand it over to the template compiler */
1100 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001101 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001102 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001103 );
1104 if (!filter)
1105 return NULL;
1106 filter_is_callable = PyCallable_Check(filter);
1107 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00001108 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001109
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001110 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001111 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001112 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001113 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001114
1115 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001116 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001117 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001118 state_fini(&state);
1119 return NULL;
1120 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001121
1122 n = i = 0;
1123
1124 while (!count || n < count) {
1125
1126 state_reset(&state);
1127
1128 state.ptr = state.start;
1129
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001130 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001131 if (PyErr_Occurred())
1132 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001133
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001134 if (status <= 0) {
1135 if (status == 0)
1136 break;
1137 pattern_error(status);
1138 goto error;
1139 }
Tim Peters3d563502006-01-21 02:47:53 +00001140
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001141 b = STATE_OFFSET(&state, state.start);
1142 e = STATE_OFFSET(&state, state.ptr);
1143
1144 if (i < b) {
1145 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001146 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001147 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001148 if (!item)
1149 goto error;
1150 status = PyList_Append(list, item);
1151 Py_DECREF(item);
1152 if (status < 0)
1153 goto error;
1154
1155 } else if (i == b && i == e && n > 0)
1156 /* ignore empty match on latest position */
1157 goto next;
1158
1159 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001160 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001161 match = pattern_new_match(self, &state, 1);
1162 if (!match)
1163 goto error;
Raymond Hettinger8ae46892003-10-12 19:09:37 +00001164 args = PyTuple_Pack(1, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001165 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00001166 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001167 goto error;
1168 }
1169 item = PyObject_CallObject(filter, args);
1170 Py_DECREF(args);
1171 Py_DECREF(match);
1172 if (!item)
1173 goto error;
1174 } else {
1175 /* filter is literal string */
1176 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001177 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001178 }
1179
1180 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001181 if (item != Py_None) {
1182 status = PyList_Append(list, item);
1183 Py_DECREF(item);
1184 if (status < 0)
1185 goto error;
1186 }
Tim Peters3d563502006-01-21 02:47:53 +00001187
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001188 i = e;
1189 n = n + 1;
1190
1191next:
1192 /* move on */
1193 if (state.ptr == state.start)
1194 state.start = (void*) ((char*) state.ptr + state.charsize);
1195 else
1196 state.start = state.ptr;
1197
1198 }
1199
1200 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001201 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001202 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001203 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001204 if (!item)
1205 goto error;
1206 status = PyList_Append(list, item);
1207 Py_DECREF(item);
1208 if (status < 0)
1209 goto error;
1210 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001211
1212 state_fini(&state);
1213
Guido van Rossum4e173842001-12-07 04:25:10 +00001214 Py_DECREF(filter);
1215
Fredrik Lundhdac58492001-10-21 21:48:30 +00001216 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001217 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001218 if (!joiner) {
1219 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001220 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001221 }
1222 if (PyList_GET_SIZE(list) == 0) {
1223 Py_DECREF(list);
1224 item = joiner;
1225 }
1226 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001227 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001228 item = _PyBytes_Join(joiner, list);
1229 else
1230 item = PyUnicode_Join(joiner, list);
1231 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001232 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001233 if (!item)
1234 return NULL;
1235 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001236
1237 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001238 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001239
1240 return item;
1241
1242error:
1243 Py_DECREF(list);
1244 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001245 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001246 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001247
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001248}
1249
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001250/*[clinic input]
1251_sre.SRE_Pattern.sub
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001252
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001253 repl: object
1254 string: object
1255 count: Py_ssize_t = 0
1256
1257Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1258[clinic start generated code]*/
1259
1260static PyObject *
1261_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1262 PyObject *string, Py_ssize_t count)
1263/*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1264{
1265 return pattern_subx(self, repl, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001266}
1267
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001268/*[clinic input]
1269_sre.SRE_Pattern.subn
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001270
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001271 repl: object
1272 string: object
1273 count: Py_ssize_t = 0
1274
1275Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1276[clinic start generated code]*/
1277
1278static PyObject *
1279_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1280 PyObject *string, Py_ssize_t count)
1281/*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1282{
1283 return pattern_subx(self, repl, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001284}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001285
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001286/*[clinic input]
1287_sre.SRE_Pattern.__copy__
1288
1289[clinic start generated code]*/
1290
1291static PyObject *
1292_sre_SRE_Pattern___copy___impl(PatternObject *self)
1293/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001294{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001295#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001296 PatternObject* copy;
1297 int offset;
1298
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001299 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
1300 if (!copy)
1301 return NULL;
1302
1303 offset = offsetof(PatternObject, groups);
1304
1305 Py_XINCREF(self->groupindex);
1306 Py_XINCREF(self->indexgroup);
1307 Py_XINCREF(self->pattern);
1308
1309 memcpy((char*) copy + offset, (char*) self + offset,
1310 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00001311 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001312
1313 return (PyObject*) copy;
1314#else
1315 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
1316 return NULL;
1317#endif
1318}
1319
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001320/*[clinic input]
1321_sre.SRE_Pattern.__deepcopy__
1322
1323 memo: object
1324
1325[clinic start generated code]*/
1326
1327static PyObject *
1328_sre_SRE_Pattern___deepcopy___impl(PatternObject *self, PyObject *memo)
1329/*[clinic end generated code: output=75efe69bd12c5d7d input=3959719482c07f70]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001330{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001331#ifdef USE_BUILTIN_COPY
1332 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00001333
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001334 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001335 if (!copy)
1336 return NULL;
1337
1338 if (!deepcopy(&copy->groupindex, memo) ||
1339 !deepcopy(&copy->indexgroup, memo) ||
1340 !deepcopy(&copy->pattern, memo)) {
1341 Py_DECREF(copy);
1342 return NULL;
1343 }
1344
1345#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001346 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
1347 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001348#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001349}
1350
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001351static PyObject *
1352pattern_repr(PatternObject *obj)
1353{
1354 static const struct {
1355 const char *name;
1356 int value;
1357 } flag_names[] = {
1358 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1359 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1360 {"re.LOCALE", SRE_FLAG_LOCALE},
1361 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1362 {"re.DOTALL", SRE_FLAG_DOTALL},
1363 {"re.UNICODE", SRE_FLAG_UNICODE},
1364 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1365 {"re.DEBUG", SRE_FLAG_DEBUG},
1366 {"re.ASCII", SRE_FLAG_ASCII},
1367 };
1368 PyObject *result = NULL;
1369 PyObject *flag_items;
Victor Stinner706768c2014-08-16 01:03:39 +02001370 size_t i;
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001371 int flags = obj->flags;
1372
1373 /* Omit re.UNICODE for valid string patterns. */
1374 if (obj->isbytes == 0 &&
1375 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1376 SRE_FLAG_UNICODE)
1377 flags &= ~SRE_FLAG_UNICODE;
1378
1379 flag_items = PyList_New(0);
1380 if (!flag_items)
1381 return NULL;
1382
1383 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1384 if (flags & flag_names[i].value) {
1385 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1386 if (!item)
1387 goto done;
1388
1389 if (PyList_Append(flag_items, item) < 0) {
1390 Py_DECREF(item);
1391 goto done;
1392 }
1393 Py_DECREF(item);
1394 flags &= ~flag_names[i].value;
1395 }
1396 }
1397 if (flags) {
1398 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1399 if (!item)
1400 goto done;
1401
1402 if (PyList_Append(flag_items, item) < 0) {
1403 Py_DECREF(item);
1404 goto done;
1405 }
1406 Py_DECREF(item);
1407 }
1408
1409 if (PyList_Size(flag_items) > 0) {
1410 PyObject *flags_result;
1411 PyObject *sep = PyUnicode_FromString("|");
1412 if (!sep)
1413 goto done;
1414 flags_result = PyUnicode_Join(sep, flag_items);
1415 Py_DECREF(sep);
1416 if (!flags_result)
1417 goto done;
1418 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1419 obj->pattern, flags_result);
1420 Py_DECREF(flags_result);
1421 }
1422 else {
1423 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1424 }
1425
1426done:
1427 Py_DECREF(flag_items);
1428 return result;
1429}
1430
Raymond Hettinger94478742004-09-24 04:31:19 +00001431PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
1432
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001433/* PatternObject's 'groupindex' method. */
1434static PyObject *
1435pattern_groupindex(PatternObject *self)
1436{
1437 return PyDictProxy_New(self->groupindex);
1438}
1439
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001440static int _validate(PatternObject *self); /* Forward */
1441
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001442/*[clinic input]
1443_sre.compile
1444
1445 pattern: object
1446 flags: int
1447 code: object(subclass_of='&PyList_Type')
1448 groups: Py_ssize_t
1449 groupindex: object
1450 indexgroup: object
1451
1452[clinic start generated code]*/
1453
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001454static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001455_sre_compile_impl(PyModuleDef *module, PyObject *pattern, int flags,
1456 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1457 PyObject *indexgroup)
1458/*[clinic end generated code: output=3004b293730bf309 input=7d059ec8ae1edb85]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001459{
1460 /* "compile" pattern descriptor to pattern object */
1461
1462 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001463 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001464
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001465 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001466 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001467 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1468 if (!self)
1469 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001470 self->weakreflist = NULL;
1471 self->pattern = NULL;
1472 self->groupindex = NULL;
1473 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001474
1475 self->codesize = n;
1476
1477 for (i = 0; i < n; i++) {
1478 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001479 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001480 self->code[i] = (SRE_CODE) value;
1481 if ((unsigned long) self->code[i] != value) {
1482 PyErr_SetString(PyExc_OverflowError,
1483 "regular expression code size limit exceeded");
1484 break;
1485 }
1486 }
1487
1488 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001489 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001490 return NULL;
1491 }
1492
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001493 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001494 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001495 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001496 else {
1497 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001498 int charsize;
1499 Py_buffer view;
1500 view.buf = NULL;
1501 if (!getstring(pattern, &p_length, &self->isbytes,
1502 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001503 Py_DECREF(self);
1504 return NULL;
1505 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001506 if (view.buf)
1507 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001508 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001509
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001510 Py_INCREF(pattern);
1511 self->pattern = pattern;
1512
1513 self->flags = flags;
1514
1515 self->groups = groups;
1516
1517 Py_XINCREF(groupindex);
1518 self->groupindex = groupindex;
1519
1520 Py_XINCREF(indexgroup);
1521 self->indexgroup = indexgroup;
1522
1523 self->weakreflist = NULL;
1524
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001525 if (!_validate(self)) {
1526 Py_DECREF(self);
1527 return NULL;
1528 }
1529
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001530 return (PyObject*) self;
1531}
1532
Guido van Rossumb700df92000-03-31 14:59:30 +00001533/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001534/* Code validation */
1535
1536/* To learn more about this code, have a look at the _compile() function in
1537 Lib/sre_compile.py. The validation functions below checks the code array
1538 for conformance with the code patterns generated there.
1539
1540 The nice thing about the generated code is that it is position-independent:
1541 all jumps are relative jumps forward. Also, jumps don't cross each other:
1542 the target of a later jump is always earlier than the target of an earlier
1543 jump. IOW, this is okay:
1544
1545 J---------J-------T--------T
1546 \ \_____/ /
1547 \______________________/
1548
1549 but this is not:
1550
1551 J---------J-------T--------T
1552 \_________\_____/ /
1553 \____________/
1554
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001555 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001556*/
1557
1558/* Defining this one enables tracing of the validator */
1559#undef VVERBOSE
1560
1561/* Trace macro for the validator */
1562#if defined(VVERBOSE)
1563#define VTRACE(v) printf v
1564#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001565#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001566#endif
1567
1568/* Report failure */
1569#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1570
1571/* Extract opcode, argument, or skip count from code array */
1572#define GET_OP \
1573 do { \
1574 VTRACE(("%p: ", code)); \
1575 if (code >= end) FAIL; \
1576 op = *code++; \
1577 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1578 } while (0)
1579#define GET_ARG \
1580 do { \
1581 VTRACE(("%p= ", code)); \
1582 if (code >= end) FAIL; \
1583 arg = *code++; \
1584 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1585 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001586#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001587 do { \
1588 VTRACE(("%p= ", code)); \
1589 if (code >= end) FAIL; \
1590 skip = *code; \
1591 VTRACE(("%lu (skip to %p)\n", \
1592 (unsigned long)skip, code+skip)); \
Victor Stinner1fa174a2013-08-28 02:06:21 +02001593 if (skip-adj > (Py_uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001594 FAIL; \
1595 code++; \
1596 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001597#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001598
1599static int
1600_validate_charset(SRE_CODE *code, SRE_CODE *end)
1601{
1602 /* Some variables are manipulated by the macros above */
1603 SRE_CODE op;
1604 SRE_CODE arg;
1605 SRE_CODE offset;
1606 int i;
1607
1608 while (code < end) {
1609 GET_OP;
1610 switch (op) {
1611
1612 case SRE_OP_NEGATE:
1613 break;
1614
1615 case SRE_OP_LITERAL:
1616 GET_ARG;
1617 break;
1618
1619 case SRE_OP_RANGE:
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +02001620 case SRE_OP_RANGE_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001621 GET_ARG;
1622 GET_ARG;
1623 break;
1624
1625 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001626 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001627 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001628 FAIL;
1629 code += offset;
1630 break;
1631
1632 case SRE_OP_BIGCHARSET:
1633 GET_ARG; /* Number of blocks */
1634 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001635 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001636 FAIL;
1637 /* Make sure that each byte points to a valid block */
1638 for (i = 0; i < 256; i++) {
1639 if (((unsigned char *)code)[i] >= arg)
1640 FAIL;
1641 }
1642 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001643 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001644 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001645 FAIL;
1646 code += offset;
1647 break;
1648
1649 case SRE_OP_CATEGORY:
1650 GET_ARG;
1651 switch (arg) {
1652 case SRE_CATEGORY_DIGIT:
1653 case SRE_CATEGORY_NOT_DIGIT:
1654 case SRE_CATEGORY_SPACE:
1655 case SRE_CATEGORY_NOT_SPACE:
1656 case SRE_CATEGORY_WORD:
1657 case SRE_CATEGORY_NOT_WORD:
1658 case SRE_CATEGORY_LINEBREAK:
1659 case SRE_CATEGORY_NOT_LINEBREAK:
1660 case SRE_CATEGORY_LOC_WORD:
1661 case SRE_CATEGORY_LOC_NOT_WORD:
1662 case SRE_CATEGORY_UNI_DIGIT:
1663 case SRE_CATEGORY_UNI_NOT_DIGIT:
1664 case SRE_CATEGORY_UNI_SPACE:
1665 case SRE_CATEGORY_UNI_NOT_SPACE:
1666 case SRE_CATEGORY_UNI_WORD:
1667 case SRE_CATEGORY_UNI_NOT_WORD:
1668 case SRE_CATEGORY_UNI_LINEBREAK:
1669 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1670 break;
1671 default:
1672 FAIL;
1673 }
1674 break;
1675
1676 default:
1677 FAIL;
1678
1679 }
1680 }
1681
1682 return 1;
1683}
1684
1685static int
1686_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1687{
1688 /* Some variables are manipulated by the macros above */
1689 SRE_CODE op;
1690 SRE_CODE arg;
1691 SRE_CODE skip;
1692
1693 VTRACE(("code=%p, end=%p\n", code, end));
1694
1695 if (code > end)
1696 FAIL;
1697
1698 while (code < end) {
1699 GET_OP;
1700 switch (op) {
1701
1702 case SRE_OP_MARK:
1703 /* We don't check whether marks are properly nested; the
1704 sre_match() code is robust even if they don't, and the worst
1705 you can get is nonsensical match results. */
1706 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001707 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001708 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1709 FAIL;
1710 }
1711 break;
1712
1713 case SRE_OP_LITERAL:
1714 case SRE_OP_NOT_LITERAL:
1715 case SRE_OP_LITERAL_IGNORE:
1716 case SRE_OP_NOT_LITERAL_IGNORE:
1717 GET_ARG;
1718 /* The arg is just a character, nothing to check */
1719 break;
1720
1721 case SRE_OP_SUCCESS:
1722 case SRE_OP_FAILURE:
1723 /* Nothing to check; these normally end the matching process */
1724 break;
1725
1726 case SRE_OP_AT:
1727 GET_ARG;
1728 switch (arg) {
1729 case SRE_AT_BEGINNING:
1730 case SRE_AT_BEGINNING_STRING:
1731 case SRE_AT_BEGINNING_LINE:
1732 case SRE_AT_END:
1733 case SRE_AT_END_LINE:
1734 case SRE_AT_END_STRING:
1735 case SRE_AT_BOUNDARY:
1736 case SRE_AT_NON_BOUNDARY:
1737 case SRE_AT_LOC_BOUNDARY:
1738 case SRE_AT_LOC_NON_BOUNDARY:
1739 case SRE_AT_UNI_BOUNDARY:
1740 case SRE_AT_UNI_NON_BOUNDARY:
1741 break;
1742 default:
1743 FAIL;
1744 }
1745 break;
1746
1747 case SRE_OP_ANY:
1748 case SRE_OP_ANY_ALL:
1749 /* These have no operands */
1750 break;
1751
1752 case SRE_OP_IN:
1753 case SRE_OP_IN_IGNORE:
1754 GET_SKIP;
1755 /* Stop 1 before the end; we check the FAILURE below */
1756 if (!_validate_charset(code, code+skip-2))
1757 FAIL;
1758 if (code[skip-2] != SRE_OP_FAILURE)
1759 FAIL;
1760 code += skip-1;
1761 break;
1762
1763 case SRE_OP_INFO:
1764 {
1765 /* A minimal info field is
1766 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1767 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1768 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001769 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001770 SRE_CODE *newcode;
1771 GET_SKIP;
1772 newcode = code+skip-1;
1773 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001774 GET_ARG;
1775 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001776 /* Check that only valid flags are present */
1777 if ((flags & ~(SRE_INFO_PREFIX |
1778 SRE_INFO_LITERAL |
1779 SRE_INFO_CHARSET)) != 0)
1780 FAIL;
1781 /* PREFIX and CHARSET are mutually exclusive */
1782 if ((flags & SRE_INFO_PREFIX) &&
1783 (flags & SRE_INFO_CHARSET))
1784 FAIL;
1785 /* LITERAL implies PREFIX */
1786 if ((flags & SRE_INFO_LITERAL) &&
1787 !(flags & SRE_INFO_PREFIX))
1788 FAIL;
1789 /* Validate the prefix */
1790 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001791 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001792 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001793 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001794 /* Here comes the prefix string */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001795 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001796 FAIL;
1797 code += prefix_len;
1798 /* And here comes the overlap table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001799 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001800 FAIL;
1801 /* Each overlap value should be < prefix_len */
1802 for (i = 0; i < prefix_len; i++) {
1803 if (code[i] >= prefix_len)
1804 FAIL;
1805 }
1806 code += prefix_len;
1807 }
1808 /* Validate the charset */
1809 if (flags & SRE_INFO_CHARSET) {
1810 if (!_validate_charset(code, newcode-1))
1811 FAIL;
1812 if (newcode[-1] != SRE_OP_FAILURE)
1813 FAIL;
1814 code = newcode;
1815 }
1816 else if (code != newcode) {
1817 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1818 FAIL;
1819 }
1820 }
1821 break;
1822
1823 case SRE_OP_BRANCH:
1824 {
1825 SRE_CODE *target = NULL;
1826 for (;;) {
1827 GET_SKIP;
1828 if (skip == 0)
1829 break;
1830 /* Stop 2 before the end; we check the JUMP below */
1831 if (!_validate_inner(code, code+skip-3, groups))
1832 FAIL;
1833 code += skip-3;
1834 /* Check that it ends with a JUMP, and that each JUMP
1835 has the same target */
1836 GET_OP;
1837 if (op != SRE_OP_JUMP)
1838 FAIL;
1839 GET_SKIP;
1840 if (target == NULL)
1841 target = code+skip-1;
1842 else if (code+skip-1 != target)
1843 FAIL;
1844 }
1845 }
1846 break;
1847
1848 case SRE_OP_REPEAT_ONE:
1849 case SRE_OP_MIN_REPEAT_ONE:
1850 {
1851 SRE_CODE min, max;
1852 GET_SKIP;
1853 GET_ARG; min = arg;
1854 GET_ARG; max = arg;
1855 if (min > max)
1856 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001857 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001858 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001859 if (!_validate_inner(code, code+skip-4, groups))
1860 FAIL;
1861 code += skip-4;
1862 GET_OP;
1863 if (op != SRE_OP_SUCCESS)
1864 FAIL;
1865 }
1866 break;
1867
1868 case SRE_OP_REPEAT:
1869 {
1870 SRE_CODE min, max;
1871 GET_SKIP;
1872 GET_ARG; min = arg;
1873 GET_ARG; max = arg;
1874 if (min > max)
1875 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001876 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001877 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001878 if (!_validate_inner(code, code+skip-3, groups))
1879 FAIL;
1880 code += skip-3;
1881 GET_OP;
1882 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1883 FAIL;
1884 }
1885 break;
1886
1887 case SRE_OP_GROUPREF:
1888 case SRE_OP_GROUPREF_IGNORE:
1889 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001890 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001891 FAIL;
1892 break;
1893
1894 case SRE_OP_GROUPREF_EXISTS:
1895 /* The regex syntax for this is: '(?(group)then|else)', where
1896 'group' is either an integer group number or a group name,
1897 'then' and 'else' are sub-regexes, and 'else' is optional. */
1898 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001899 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001900 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001901 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001902 code--; /* The skip is relative to the first arg! */
1903 /* There are two possibilities here: if there is both a 'then'
1904 part and an 'else' part, the generated code looks like:
1905
1906 GROUPREF_EXISTS
1907 <group>
1908 <skipyes>
1909 ...then part...
1910 JUMP
1911 <skipno>
1912 (<skipyes> jumps here)
1913 ...else part...
1914 (<skipno> jumps here)
1915
1916 If there is only a 'then' part, it looks like:
1917
1918 GROUPREF_EXISTS
1919 <group>
1920 <skip>
1921 ...then part...
1922 (<skip> jumps here)
1923
1924 There is no direct way to decide which it is, and we don't want
1925 to allow arbitrary jumps anywhere in the code; so we just look
1926 for a JUMP opcode preceding our skip target.
1927 */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001928 if (skip >= 3 && skip-3 < (Py_uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001929 code[skip-3] == SRE_OP_JUMP)
1930 {
1931 VTRACE(("both then and else parts present\n"));
1932 if (!_validate_inner(code+1, code+skip-3, groups))
1933 FAIL;
1934 code += skip-2; /* Position after JUMP, at <skipno> */
1935 GET_SKIP;
1936 if (!_validate_inner(code, code+skip-1, groups))
1937 FAIL;
1938 code += skip-1;
1939 }
1940 else {
1941 VTRACE(("only a then part present\n"));
1942 if (!_validate_inner(code+1, code+skip-1, groups))
1943 FAIL;
1944 code += skip-1;
1945 }
1946 break;
1947
1948 case SRE_OP_ASSERT:
1949 case SRE_OP_ASSERT_NOT:
1950 GET_SKIP;
1951 GET_ARG; /* 0 for lookahead, width for lookbehind */
1952 code--; /* Back up over arg to simplify math below */
1953 if (arg & 0x80000000)
1954 FAIL; /* Width too large */
1955 /* Stop 1 before the end; we check the SUCCESS below */
1956 if (!_validate_inner(code+1, code+skip-2, groups))
1957 FAIL;
1958 code += skip-2;
1959 GET_OP;
1960 if (op != SRE_OP_SUCCESS)
1961 FAIL;
1962 break;
1963
1964 default:
1965 FAIL;
1966
1967 }
1968 }
1969
1970 VTRACE(("okay\n"));
1971 return 1;
1972}
1973
1974static int
1975_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1976{
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001977 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1978 code >= end || end[-1] != SRE_OP_SUCCESS)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001979 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001980 return _validate_inner(code, end-1, groups);
1981}
1982
1983static int
1984_validate(PatternObject *self)
1985{
1986 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1987 {
1988 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1989 return 0;
1990 }
1991 else
1992 VTRACE(("Success!\n"));
1993 return 1;
1994}
1995
1996/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001997/* match methods */
1998
1999static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002000match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002001{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002002 Py_XDECREF(self->regs);
2003 Py_XDECREF(self->string);
2004 Py_DECREF(self->pattern);
2005 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00002006}
2007
2008static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002009match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00002010{
Serhiy Storchaka25324972013-10-16 12:46:28 +03002011 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002012 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03002013 Py_buffer view;
2014 PyObject *result;
2015 void* ptr;
2016
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002017 if (index < 0 || index >= self->groups) {
2018 /* raise IndexError if we were given a bad group number */
2019 PyErr_SetString(
2020 PyExc_IndexError,
2021 "no such group"
2022 );
2023 return NULL;
2024 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002025
Fredrik Lundh6f013982000-07-03 18:44:21 +00002026 index *= 2;
2027
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002028 if (self->string == Py_None || self->mark[index] < 0) {
2029 /* return default value if the string or group is undefined */
2030 Py_INCREF(def);
2031 return def;
2032 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002033
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002034 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03002035 if (ptr == NULL)
2036 return NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002037 result = getslice(isbytes, ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +03002038 self->string, self->mark[index], self->mark[index+1]);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002039 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03002040 PyBuffer_Release(&view);
2041 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002042}
2043
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002044static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002045match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002046{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002047 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002048
Guido van Rossumddefaf32007-01-14 03:31:43 +00002049 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002050 /* Default value */
2051 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00002052
Christian Heimes217cfd12007-12-02 14:31:20 +00002053 if (PyLong_Check(index))
2054 return PyLong_AsSsize_t(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00002055
Fredrik Lundh6f013982000-07-03 18:44:21 +00002056 i = -1;
2057
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002058 if (self->pattern->groupindex) {
2059 index = PyObject_GetItem(self->pattern->groupindex, index);
2060 if (index) {
Neal Norwitz1fe5f382007-08-31 04:32:55 +00002061 if (PyLong_Check(index))
Christian Heimes217cfd12007-12-02 14:31:20 +00002062 i = PyLong_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00002063 Py_DECREF(index);
2064 } else
2065 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002066 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002067
2068 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002069}
2070
2071static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002072match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002073{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002074 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002075}
2076
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002077/*[clinic input]
2078_sre.SRE_Match.expand
2079
2080 template: object
2081
2082Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
2083[clinic start generated code]*/
2084
2085static PyObject *
2086_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
2087/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002088{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002089 /* delegate to Python code */
2090 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00002091 SRE_PY_MODULE, "_expand",
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002092 PyTuple_Pack(3, self->pattern, self, template)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002093 );
2094}
2095
2096static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002097match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002098{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002099 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002100 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002101
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002102 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002103
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002104 switch (size) {
2105 case 0:
2106 result = match_getslice(self, Py_False, Py_None);
2107 break;
2108 case 1:
2109 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2110 break;
2111 default:
2112 /* fetch multiple items */
2113 result = PyTuple_New(size);
2114 if (!result)
2115 return NULL;
2116 for (i = 0; i < size; i++) {
2117 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002118 self, PyTuple_GET_ITEM(args, i), Py_None
2119 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002120 if (!item) {
2121 Py_DECREF(result);
2122 return NULL;
2123 }
2124 PyTuple_SET_ITEM(result, i, item);
2125 }
2126 break;
2127 }
2128 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002129}
2130
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002131/*[clinic input]
2132_sre.SRE_Match.groups
2133
2134 default: object = None
2135 Is used for groups that did not participate in the match.
2136
2137Return a tuple containing all the subgroups of the match, from 1.
2138[clinic start generated code]*/
2139
2140static PyObject *
2141_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2142/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002143{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002144 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002145 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002146
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002147 result = PyTuple_New(self->groups-1);
2148 if (!result)
2149 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002150
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002151 for (index = 1; index < self->groups; index++) {
2152 PyObject* item;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002153 item = match_getslice_by_index(self, index, default_value);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002154 if (!item) {
2155 Py_DECREF(result);
2156 return NULL;
2157 }
2158 PyTuple_SET_ITEM(result, index-1, item);
2159 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002160
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002161 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002162}
2163
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002164/*[clinic input]
2165_sre.SRE_Match.groupdict
2166
2167 default: object = None
2168 Is used for groups that did not participate in the match.
2169
2170Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2171[clinic start generated code]*/
2172
2173static PyObject *
2174_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2175/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002176{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002177 PyObject* result;
2178 PyObject* keys;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002179 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002180
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002181 result = PyDict_New();
2182 if (!result || !self->pattern->groupindex)
2183 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002184
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002185 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002186 if (!keys)
2187 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002188
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002189 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002190 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002191 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002192 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002193 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002194 if (!key)
2195 goto failed;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002196 value = match_getslice(self, key, default_value);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002197 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002198 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002199 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002200 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002201 status = PyDict_SetItem(result, key, value);
2202 Py_DECREF(value);
2203 if (status < 0)
2204 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002205 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002206
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002207 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002208
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002209 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002210
2211failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00002212 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002213 Py_DECREF(result);
2214 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002215}
2216
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002217/*[clinic input]
2218_sre.SRE_Match.start -> Py_ssize_t
2219
2220 group: object(c_default="NULL") = 0
2221 /
2222
2223Return index of the start of the substring matched by group.
2224[clinic start generated code]*/
2225
2226static Py_ssize_t
2227_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2228/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002229{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002230 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002231
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002232 if (index < 0 || index >= self->groups) {
2233 PyErr_SetString(
2234 PyExc_IndexError,
2235 "no such group"
2236 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002237 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002238 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002239
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002240 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002241 return self->mark[index*2];
Guido van Rossumb700df92000-03-31 14:59:30 +00002242}
2243
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002244/*[clinic input]
2245_sre.SRE_Match.end -> Py_ssize_t
2246
2247 group: object(c_default="NULL") = 0
2248 /
2249
2250Return index of the end of the substring matched by group.
2251[clinic start generated code]*/
2252
2253static Py_ssize_t
2254_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2255/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002256{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002257 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002258
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002259 if (index < 0 || index >= self->groups) {
2260 PyErr_SetString(
2261 PyExc_IndexError,
2262 "no such group"
2263 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002264 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002265 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002266
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002267 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002268 return self->mark[index*2+1];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002269}
2270
2271LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002272_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002273{
2274 PyObject* pair;
2275 PyObject* item;
2276
2277 pair = PyTuple_New(2);
2278 if (!pair)
2279 return NULL;
2280
Christian Heimes217cfd12007-12-02 14:31:20 +00002281 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002282 if (!item)
2283 goto error;
2284 PyTuple_SET_ITEM(pair, 0, item);
2285
Christian Heimes217cfd12007-12-02 14:31:20 +00002286 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002287 if (!item)
2288 goto error;
2289 PyTuple_SET_ITEM(pair, 1, item);
2290
2291 return pair;
2292
2293 error:
2294 Py_DECREF(pair);
2295 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002296}
2297
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002298/*[clinic input]
2299_sre.SRE_Match.span
2300
2301 group: object(c_default="NULL") = 0
2302 /
2303
2304For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).
2305[clinic start generated code]*/
2306
2307static PyObject *
2308_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2309/*[clinic end generated code: output=f02ae40594d14fe6 input=49092b6008d176d3]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002310{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002311 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002312
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002313 if (index < 0 || index >= self->groups) {
2314 PyErr_SetString(
2315 PyExc_IndexError,
2316 "no such group"
2317 );
2318 return NULL;
2319 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002320
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002321 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002322 return _pair(self->mark[index*2], self->mark[index*2+1]);
2323}
2324
2325static PyObject*
2326match_regs(MatchObject* self)
2327{
2328 PyObject* regs;
2329 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002330 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002331
2332 regs = PyTuple_New(self->groups);
2333 if (!regs)
2334 return NULL;
2335
2336 for (index = 0; index < self->groups; index++) {
2337 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2338 if (!item) {
2339 Py_DECREF(regs);
2340 return NULL;
2341 }
2342 PyTuple_SET_ITEM(regs, index, item);
2343 }
2344
2345 Py_INCREF(regs);
2346 self->regs = regs;
2347
2348 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002349}
2350
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002351/*[clinic input]
2352_sre.SRE_Match.__copy__
2353
2354[clinic start generated code]*/
2355
2356static PyObject *
2357_sre_SRE_Match___copy___impl(MatchObject *self)
2358/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002359{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002360#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002361 MatchObject* copy;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002362 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00002363
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002364 slots = 2 * (self->pattern->groups+1);
2365
2366 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2367 if (!copy)
2368 return NULL;
2369
2370 /* this value a constant, but any compiler should be able to
2371 figure that out all by itself */
2372 offset = offsetof(MatchObject, string);
2373
2374 Py_XINCREF(self->pattern);
2375 Py_XINCREF(self->string);
2376 Py_XINCREF(self->regs);
2377
2378 memcpy((char*) copy + offset, (char*) self + offset,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002379 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002380
2381 return (PyObject*) copy;
2382#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002383 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002384 return NULL;
2385#endif
2386}
2387
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002388/*[clinic input]
2389_sre.SRE_Match.__deepcopy__
2390
2391 memo: object
2392
2393[clinic start generated code]*/
2394
2395static PyObject *
2396_sre_SRE_Match___deepcopy___impl(MatchObject *self, PyObject *memo)
2397/*[clinic end generated code: output=2b657578eb03f4a3 input=b65b72489eac64cc]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002398{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002399#ifdef USE_BUILTIN_COPY
2400 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002401
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002402 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002403 if (!copy)
2404 return NULL;
2405
2406 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2407 !deepcopy(&copy->string, memo) ||
2408 !deepcopy(&copy->regs, memo)) {
2409 Py_DECREF(copy);
2410 return NULL;
2411 }
2412
2413#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002414 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2415 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002416#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002417}
2418
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002419PyDoc_STRVAR(match_doc,
2420"The result of re.match() and re.search().\n\
2421Match objects always have a boolean value of True.");
2422
2423PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002424"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002425 Return subgroup(s) of the match by indices or names.\n\
2426 For 0 returns the entire match.");
2427
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002428static PyObject *
2429match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002430{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002431 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002432 return PyLong_FromSsize_t(self->lastindex);
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002433 Py_INCREF(Py_None);
2434 return Py_None;
Guido van Rossumb700df92000-03-31 14:59:30 +00002435}
2436
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002437static PyObject *
2438match_lastgroup_get(MatchObject *self)
2439{
2440 if (self->pattern->indexgroup && self->lastindex >= 0) {
2441 PyObject* result = PySequence_GetItem(
2442 self->pattern->indexgroup, self->lastindex
2443 );
2444 if (result)
2445 return result;
2446 PyErr_Clear();
2447 }
2448 Py_INCREF(Py_None);
2449 return Py_None;
2450}
2451
2452static PyObject *
2453match_regs_get(MatchObject *self)
2454{
2455 if (self->regs) {
2456 Py_INCREF(self->regs);
2457 return self->regs;
2458 } else
2459 return match_regs(self);
2460}
2461
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002462static PyObject *
2463match_repr(MatchObject *self)
2464{
2465 PyObject *result;
2466 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2467 if (group0 == NULL)
2468 return NULL;
2469 result = PyUnicode_FromFormat(
2470 "<%s object; span=(%d, %d), match=%.50R>",
2471 Py_TYPE(self)->tp_name,
2472 self->mark[0], self->mark[1], group0);
2473 Py_DECREF(group0);
2474 return result;
2475}
2476
2477
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002478static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002479pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002480{
2481 /* create match object (from state object) */
2482
2483 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002484 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002485 char* base;
2486 int n;
2487
2488 if (status > 0) {
2489
2490 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002491 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002492 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2493 2*(pattern->groups+1));
2494 if (!match)
2495 return NULL;
2496
2497 Py_INCREF(pattern);
2498 match->pattern = pattern;
2499
2500 Py_INCREF(state->string);
2501 match->string = state->string;
2502
2503 match->regs = NULL;
2504 match->groups = pattern->groups+1;
2505
2506 /* fill in group slices */
2507
2508 base = (char*) state->beginning;
2509 n = state->charsize;
2510
2511 match->mark[0] = ((char*) state->start - base) / n;
2512 match->mark[1] = ((char*) state->ptr - base) / n;
2513
2514 for (i = j = 0; i < pattern->groups; i++, j+=2)
2515 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2516 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2517 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2518 } else
2519 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2520
2521 match->pos = state->pos;
2522 match->endpos = state->endpos;
2523
2524 match->lastindex = state->lastindex;
2525
2526 return (PyObject*) match;
2527
2528 } else if (status == 0) {
2529
2530 /* no match */
2531 Py_INCREF(Py_None);
2532 return Py_None;
2533
2534 }
2535
2536 /* internal error */
2537 pattern_error(status);
2538 return NULL;
2539}
2540
2541
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002542/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002543/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002544
2545static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002546scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002547{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002548 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002549 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002550 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002551}
2552
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002553/*[clinic input]
2554_sre.SRE_Scanner.match
2555
2556[clinic start generated code]*/
2557
2558static PyObject *
2559_sre_SRE_Scanner_match_impl(ScannerObject *self)
2560/*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002561{
2562 SRE_STATE* state = &self->state;
2563 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002564 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002565
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002566 state_reset(state);
2567
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002568 state->ptr = state->start;
2569
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03002570 status = sre_match(state, PatternObject_GetCode(self->pattern), 0);
Thomas Wouters89f507f2006-12-13 04:49:30 +00002571 if (PyErr_Occurred())
2572 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002573
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002574 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002575 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002576
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00002577 if (status == 0 || state->ptr == state->start)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002578 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002579 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002580 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002581
2582 return match;
2583}
2584
2585
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002586/*[clinic input]
2587_sre.SRE_Scanner.search
2588
2589[clinic start generated code]*/
2590
2591static PyObject *
2592_sre_SRE_Scanner_search_impl(ScannerObject *self)
2593/*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002594{
2595 SRE_STATE* state = &self->state;
2596 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002597 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002598
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002599 state_reset(state);
2600
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002601 state->ptr = state->start;
2602
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002603 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002604 if (PyErr_Occurred())
2605 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002606
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002607 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002608 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002609
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00002610 if (status == 0 || state->ptr == state->start)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002611 state->start = (void*) ((char*) state->ptr + state->charsize);
2612 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002613 state->start = state->ptr;
2614
2615 return match;
2616}
2617
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002618static PyObject *
2619pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002620{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002621 ScannerObject* scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002622
2623 /* create scanner object */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002624 scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2625 if (!scanner)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002626 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002627 scanner->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002628
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002629 /* create search state object */
2630 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2631 Py_DECREF(scanner);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002632 return NULL;
2633 }
2634
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002635 Py_INCREF(self);
2636 scanner->pattern = (PyObject*) self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002637
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002638 return (PyObject*) scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002639}
2640
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002641#include "clinic/_sre.c.h"
2642
2643static PyMethodDef pattern_methods[] = {
2644 _SRE_SRE_PATTERN_MATCH_METHODDEF
2645 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2646 _SRE_SRE_PATTERN_SEARCH_METHODDEF
2647 _SRE_SRE_PATTERN_SUB_METHODDEF
2648 _SRE_SRE_PATTERN_SUBN_METHODDEF
2649 _SRE_SRE_PATTERN_FINDALL_METHODDEF
2650 _SRE_SRE_PATTERN_SPLIT_METHODDEF
2651 _SRE_SRE_PATTERN_FINDITER_METHODDEF
2652 _SRE_SRE_PATTERN_SCANNER_METHODDEF
2653 _SRE_SRE_PATTERN___COPY___METHODDEF
2654 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2655 {NULL, NULL}
2656};
2657
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002658static PyGetSetDef pattern_getset[] = {
2659 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2660 "A dictionary mapping group names to group numbers."},
2661 {NULL} /* Sentinel */
2662};
2663
2664#define PAT_OFF(x) offsetof(PatternObject, x)
2665static PyMemberDef pattern_members[] = {
2666 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
2667 {"flags", T_INT, PAT_OFF(flags), READONLY},
2668 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
2669 {NULL} /* Sentinel */
2670};
2671
2672static PyTypeObject Pattern_Type = {
2673 PyVarObject_HEAD_INIT(NULL, 0)
2674 "_" SRE_MODULE ".SRE_Pattern",
2675 sizeof(PatternObject), sizeof(SRE_CODE),
2676 (destructor)pattern_dealloc, /* tp_dealloc */
2677 0, /* tp_print */
2678 0, /* tp_getattr */
2679 0, /* tp_setattr */
2680 0, /* tp_reserved */
2681 (reprfunc)pattern_repr, /* tp_repr */
2682 0, /* tp_as_number */
2683 0, /* tp_as_sequence */
2684 0, /* tp_as_mapping */
2685 0, /* tp_hash */
2686 0, /* tp_call */
2687 0, /* tp_str */
2688 0, /* tp_getattro */
2689 0, /* tp_setattro */
2690 0, /* tp_as_buffer */
2691 Py_TPFLAGS_DEFAULT, /* tp_flags */
2692 pattern_doc, /* tp_doc */
2693 0, /* tp_traverse */
2694 0, /* tp_clear */
2695 0, /* tp_richcompare */
2696 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2697 0, /* tp_iter */
2698 0, /* tp_iternext */
2699 pattern_methods, /* tp_methods */
2700 pattern_members, /* tp_members */
2701 pattern_getset, /* tp_getset */
2702};
2703
2704
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002705static PyMethodDef match_methods[] = {
2706 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2707 _SRE_SRE_MATCH_START_METHODDEF
2708 _SRE_SRE_MATCH_END_METHODDEF
2709 _SRE_SRE_MATCH_SPAN_METHODDEF
2710 _SRE_SRE_MATCH_GROUPS_METHODDEF
2711 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2712 _SRE_SRE_MATCH_EXPAND_METHODDEF
2713 _SRE_SRE_MATCH___COPY___METHODDEF
2714 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2715 {NULL, NULL}
2716};
2717
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002718static PyGetSetDef match_getset[] = {
2719 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
2720 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
2721 {"regs", (getter)match_regs_get, (setter)NULL},
2722 {NULL}
2723};
2724
2725#define MATCH_OFF(x) offsetof(MatchObject, x)
2726static PyMemberDef match_members[] = {
2727 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
2728 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
2729 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
2730 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
2731 {NULL}
2732};
2733
2734/* FIXME: implement setattr("string", None) as a special case (to
2735 detach the associated string, if any */
2736
2737static PyTypeObject Match_Type = {
2738 PyVarObject_HEAD_INIT(NULL,0)
2739 "_" SRE_MODULE ".SRE_Match",
2740 sizeof(MatchObject), sizeof(Py_ssize_t),
2741 (destructor)match_dealloc, /* tp_dealloc */
2742 0, /* tp_print */
2743 0, /* tp_getattr */
2744 0, /* tp_setattr */
2745 0, /* tp_reserved */
2746 (reprfunc)match_repr, /* tp_repr */
2747 0, /* tp_as_number */
2748 0, /* tp_as_sequence */
2749 0, /* tp_as_mapping */
2750 0, /* tp_hash */
2751 0, /* tp_call */
2752 0, /* tp_str */
2753 0, /* tp_getattro */
2754 0, /* tp_setattro */
2755 0, /* tp_as_buffer */
2756 Py_TPFLAGS_DEFAULT, /* tp_flags */
2757 match_doc, /* tp_doc */
2758 0, /* tp_traverse */
2759 0, /* tp_clear */
2760 0, /* tp_richcompare */
2761 0, /* tp_weaklistoffset */
2762 0, /* tp_iter */
2763 0, /* tp_iternext */
2764 match_methods, /* tp_methods */
2765 match_members, /* tp_members */
2766 match_getset, /* tp_getset */
2767};
2768
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002769static PyMethodDef scanner_methods[] = {
2770 _SRE_SRE_SCANNER_MATCH_METHODDEF
2771 _SRE_SRE_SCANNER_SEARCH_METHODDEF
2772 {NULL, NULL}
2773};
2774
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002775#define SCAN_OFF(x) offsetof(ScannerObject, x)
2776static PyMemberDef scanner_members[] = {
2777 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2778 {NULL} /* Sentinel */
2779};
2780
2781static PyTypeObject Scanner_Type = {
2782 PyVarObject_HEAD_INIT(NULL, 0)
2783 "_" SRE_MODULE ".SRE_Scanner",
2784 sizeof(ScannerObject), 0,
2785 (destructor)scanner_dealloc,/* tp_dealloc */
2786 0, /* tp_print */
2787 0, /* tp_getattr */
2788 0, /* tp_setattr */
2789 0, /* tp_reserved */
2790 0, /* tp_repr */
2791 0, /* tp_as_number */
2792 0, /* tp_as_sequence */
2793 0, /* tp_as_mapping */
2794 0, /* tp_hash */
2795 0, /* tp_call */
2796 0, /* tp_str */
2797 0, /* tp_getattro */
2798 0, /* tp_setattro */
2799 0, /* tp_as_buffer */
2800 Py_TPFLAGS_DEFAULT, /* tp_flags */
2801 0, /* tp_doc */
2802 0, /* tp_traverse */
2803 0, /* tp_clear */
2804 0, /* tp_richcompare */
2805 0, /* tp_weaklistoffset */
2806 0, /* tp_iter */
2807 0, /* tp_iternext */
2808 scanner_methods, /* tp_methods */
2809 scanner_members, /* tp_members */
2810 0, /* tp_getset */
2811};
2812
Guido van Rossumb700df92000-03-31 14:59:30 +00002813static PyMethodDef _functions[] = {
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002814 _SRE_COMPILE_METHODDEF
2815 _SRE_GETCODESIZE_METHODDEF
2816 _SRE_GETLOWER_METHODDEF
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002817 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002818};
2819
Martin v. Löwis1a214512008-06-11 05:26:20 +00002820static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002821 PyModuleDef_HEAD_INIT,
2822 "_" SRE_MODULE,
2823 NULL,
2824 -1,
2825 _functions,
2826 NULL,
2827 NULL,
2828 NULL,
2829 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002830};
2831
2832PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002833{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002834 PyObject* m;
2835 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002836 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002837
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002838 /* Patch object types */
2839 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2840 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002841 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002842
Martin v. Löwis1a214512008-06-11 05:26:20 +00002843 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002844 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002845 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002846 d = PyModule_GetDict(m);
2847
Christian Heimes217cfd12007-12-02 14:31:20 +00002848 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002849 if (x) {
2850 PyDict_SetItemString(d, "MAGIC", x);
2851 Py_DECREF(x);
2852 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002853
Christian Heimes217cfd12007-12-02 14:31:20 +00002854 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002855 if (x) {
2856 PyDict_SetItemString(d, "CODESIZE", x);
2857 Py_DECREF(x);
2858 }
2859
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002860 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2861 if (x) {
2862 PyDict_SetItemString(d, "MAXREPEAT", x);
2863 Py_DECREF(x);
2864 }
2865
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03002866 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2867 if (x) {
2868 PyDict_SetItemString(d, "MAXGROUPS", x);
2869 Py_DECREF(x);
2870 }
2871
Neal Norwitzfe537132007-08-26 03:55:15 +00002872 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002873 if (x) {
2874 PyDict_SetItemString(d, "copyright", x);
2875 Py_DECREF(x);
2876 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002877 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002878}
2879
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002880/* vim:ts=4:sw=4:et
2881*/