blob: 8cad1ac270bc06c3b295ffa1829f3af78c4e72a5 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000038static char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000063/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064
65/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000066#define USE_FAST_SEARCH
67
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000068/* enables copy/deepcopy handling (work in progress) */
69#undef USE_BUILTIN_COPY
70
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000071/* -------------------------------------------------------------------- */
72
Fredrik Lundh80946112000-06-29 18:03:25 +000073#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000074#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000075#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000076/* fastest possible local call under MSVC */
77#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000078#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000079#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080#else
81#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000082#endif
83
84/* error codes */
85#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000086#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000087#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000088#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000089#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000090
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000091#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000092#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000093#else
94#define TRACE(v)
95#endif
96
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000097/* -------------------------------------------------------------------- */
98/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000099
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000100#define SRE_IS_DIGIT(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300101 ((ch) < 128 && Py_ISDIGIT(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000102#define SRE_IS_SPACE(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300103 ((ch) < 128 && Py_ISSPACE(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000104#define SRE_IS_LINEBREAK(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300105 ((ch) == '\n')
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000106#define SRE_IS_ALNUM(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300107 ((ch) < 128 && Py_ISALNUM(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000108#define SRE_IS_WORD(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300109 ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
Guido van Rossumb700df92000-03-31 14:59:30 +0000110
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000111static unsigned int sre_lower(unsigned int ch)
112{
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300113 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000114}
115
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200116static unsigned int sre_upper(unsigned int ch)
117{
118 return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
119}
120
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000121/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000122/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
123 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000124#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000125#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
126
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000127static unsigned int sre_lower_locale(unsigned int ch)
128{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000129 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000130}
131
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200132static unsigned int sre_upper_locale(unsigned int ch)
133{
134 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
135}
136
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000137/* unicode-specific character predicates */
138
Victor Stinner0058b862011-09-29 03:27:47 +0200139#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
140#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
141#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
142#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
143#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000144
145static unsigned int sre_lower_unicode(unsigned int ch)
146{
Victor Stinner0058b862011-09-29 03:27:47 +0200147 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000148}
149
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200150static unsigned int sre_upper_unicode(unsigned int ch)
151{
152 return (unsigned int) Py_UNICODE_TOUPPER(ch);
153}
154
Guido van Rossumb700df92000-03-31 14:59:30 +0000155LOCAL(int)
156sre_category(SRE_CODE category, unsigned int ch)
157{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000158 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000159
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000160 case SRE_CATEGORY_DIGIT:
161 return SRE_IS_DIGIT(ch);
162 case SRE_CATEGORY_NOT_DIGIT:
163 return !SRE_IS_DIGIT(ch);
164 case SRE_CATEGORY_SPACE:
165 return SRE_IS_SPACE(ch);
166 case SRE_CATEGORY_NOT_SPACE:
167 return !SRE_IS_SPACE(ch);
168 case SRE_CATEGORY_WORD:
169 return SRE_IS_WORD(ch);
170 case SRE_CATEGORY_NOT_WORD:
171 return !SRE_IS_WORD(ch);
172 case SRE_CATEGORY_LINEBREAK:
173 return SRE_IS_LINEBREAK(ch);
174 case SRE_CATEGORY_NOT_LINEBREAK:
175 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000176
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000177 case SRE_CATEGORY_LOC_WORD:
178 return SRE_LOC_IS_WORD(ch);
179 case SRE_CATEGORY_LOC_NOT_WORD:
180 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000181
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000182 case SRE_CATEGORY_UNI_DIGIT:
183 return SRE_UNI_IS_DIGIT(ch);
184 case SRE_CATEGORY_UNI_NOT_DIGIT:
185 return !SRE_UNI_IS_DIGIT(ch);
186 case SRE_CATEGORY_UNI_SPACE:
187 return SRE_UNI_IS_SPACE(ch);
188 case SRE_CATEGORY_UNI_NOT_SPACE:
189 return !SRE_UNI_IS_SPACE(ch);
190 case SRE_CATEGORY_UNI_WORD:
191 return SRE_UNI_IS_WORD(ch);
192 case SRE_CATEGORY_UNI_NOT_WORD:
193 return !SRE_UNI_IS_WORD(ch);
194 case SRE_CATEGORY_UNI_LINEBREAK:
195 return SRE_UNI_IS_LINEBREAK(ch);
196 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
197 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000198 }
199 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000200}
201
202/* helpers */
203
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000204static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000205data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000206{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000207 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000208 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000209 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000210 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000211 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000212}
213
214static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000215data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000216{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000217 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000218 minsize = state->data_stack_base+size;
219 cursize = state->data_stack_size;
220 if (cursize < minsize) {
221 void* stack;
222 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300223 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000224 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000225 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000226 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000227 return SRE_ERROR_MEMORY;
228 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000229 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000230 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000231 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000232 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000233}
234
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000235/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000236
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300237#define SRE_CHAR Py_UCS1
238#define SIZEOF_SRE_CHAR 1
239#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300240#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000241
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300242/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000243
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300244#define SRE_CHAR Py_UCS2
245#define SIZEOF_SRE_CHAR 2
246#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300247#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000248
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300249/* generate 32-bit unicode version */
250
251#define SRE_CHAR Py_UCS4
252#define SIZEOF_SRE_CHAR 4
253#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300254#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000255
256/* -------------------------------------------------------------------- */
257/* factories and destructors */
258
259/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100260static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600261static PyObject*pattern_scanner(PatternObject*, PyObject*, PyObject* kw);
Guido van Rossumb700df92000-03-31 14:59:30 +0000262
263static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000264sre_codesize(PyObject* self, PyObject *unused)
Guido van Rossumb700df92000-03-31 14:59:30 +0000265{
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100266 return PyLong_FromSize_t(sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +0000267}
268
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000269static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +0000270sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000271{
272 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000273 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000274 return NULL;
275 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000276 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000277 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000278 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundhb389df32000-06-29 12:48:37 +0000279 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000280}
281
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000282LOCAL(void)
283state_reset(SRE_STATE* state)
284{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000285 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000286 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000287
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000288 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000289 state->lastindex = -1;
290
291 state->repeat = NULL;
292
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000293 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000294}
295
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000296static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200297getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300298 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600299 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000300{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000301 /* given a python object, return a data pointer, a length (in
302 characters), and a character size. return NULL if the object
303 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000304
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000305 /* Unicode objects do not support the buffer API. So, get the data
306 directly instead. */
307 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200308 if (PyUnicode_READY(string) == -1)
309 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200310 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200311 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300312 *p_isbytes = 0;
313 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000314 }
315
Victor Stinner0058b862011-09-29 03:27:47 +0200316 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300317 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200318 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300319 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000320 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000321
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300322 *p_length = view->len;
323 *p_charsize = 1;
324 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000325
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300326 if (view->buf == NULL) {
327 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
328 PyBuffer_Release(view);
329 view->buf = NULL;
330 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000331 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300332 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000333}
334
335LOCAL(PyObject*)
336state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000337 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000338{
339 /* prepare state object */
340
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000341 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300342 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000343 void* ptr;
344
345 memset(state, 0, sizeof(SRE_STATE));
346
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300347 state->mark = PyMem_New(void *, pattern->groups * 2);
348 if (!state->mark) {
349 PyErr_NoMemory();
350 goto err;
351 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000352 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000353 state->lastindex = -1;
354
Benjamin Petersone48944b2012-03-07 14:50:25 -0600355 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300356 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000357 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600358 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000359
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300360 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600361 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200362 "cannot use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600363 goto err;
364 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300365 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600366 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200367 "cannot use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600368 goto err;
369 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000370
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000371 /* adjust boundaries */
372 if (start < 0)
373 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000374 else if (start > length)
375 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000376
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000377 if (end < 0)
378 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000379 else if (end > length)
380 end = length;
381
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300382 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000383 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000384
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000385 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000386
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000387 state->start = (void*) ((char*) ptr + start * state->charsize);
388 state->end = (void*) ((char*) ptr + end * state->charsize);
389
390 Py_INCREF(string);
391 state->string = string;
392 state->pos = start;
393 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000394
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200395 if (pattern->flags & SRE_FLAG_LOCALE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000396 state->lower = sre_lower_locale;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200397 state->upper = sre_upper_locale;
398 }
399 else if (pattern->flags & SRE_FLAG_UNICODE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000400 state->lower = sre_lower_unicode;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200401 state->upper = sre_upper_unicode;
402 }
403 else {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000404 state->lower = sre_lower;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200405 state->upper = sre_upper;
406 }
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000407
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000408 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600409 err:
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300410 PyMem_Del(state->mark);
411 state->mark = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600412 if (state->buffer.buf)
413 PyBuffer_Release(&state->buffer);
414 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000415}
416
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000417LOCAL(void)
418state_fini(SRE_STATE* state)
419{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600420 if (state->buffer.buf)
421 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000422 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000423 data_stack_dealloc(state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300424 PyMem_Del(state->mark);
425 state->mark = NULL;
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000426}
427
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000428/* calculate offset from start of string */
429#define STATE_OFFSET(state, member)\
430 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
431
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000432LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300433getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300434 PyObject* string, Py_ssize_t start, Py_ssize_t end)
435{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300436 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300437 if (PyBytes_CheckExact(string) &&
438 start == 0 && end == PyBytes_GET_SIZE(string)) {
439 Py_INCREF(string);
440 return string;
441 }
442 return PyBytes_FromStringAndSize(
443 (const char *)ptr + start, end - start);
444 }
445 else {
446 return PyUnicode_Substring(string, start, end);
447 }
448}
449
450LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000451state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000452{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000453 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000454
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000455 index = (index - 1) * 2;
456
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000457 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000458 if (empty)
459 /* want empty string */
460 i = j = 0;
461 else {
462 Py_INCREF(Py_None);
463 return Py_None;
464 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000465 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000466 i = STATE_OFFSET(state, state->mark[index]);
467 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000468 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000469
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300470 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000471}
472
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000473static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100474pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000475{
476 switch (status) {
477 case SRE_ERROR_RECURSION_LIMIT:
478 PyErr_SetString(
479 PyExc_RuntimeError,
480 "maximum recursion limit exceeded"
481 );
482 break;
483 case SRE_ERROR_MEMORY:
484 PyErr_NoMemory();
485 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000486 case SRE_ERROR_INTERRUPTED:
487 /* An exception has already been raised, so let it fly */
488 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000489 default:
490 /* other error codes indicate compiler/engine bugs */
491 PyErr_SetString(
492 PyExc_RuntimeError,
493 "internal error in regular expression engine"
494 );
495 }
496}
497
Guido van Rossumb700df92000-03-31 14:59:30 +0000498static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000499pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000500{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000501 if (self->weakreflist != NULL)
502 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000503 Py_XDECREF(self->pattern);
504 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000505 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000506 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000507}
508
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300509LOCAL(Py_ssize_t)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300510sre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300511{
512 if (state->charsize == 1)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300513 return sre_ucs1_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300514 if (state->charsize == 2)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300515 return sre_ucs2_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300516 assert(state->charsize == 4);
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300517 return sre_ucs4_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300518}
519
520LOCAL(Py_ssize_t)
521sre_search(SRE_STATE* state, SRE_CODE* pattern)
522{
523 if (state->charsize == 1)
524 return sre_ucs1_search(state, pattern);
525 if (state->charsize == 2)
526 return sre_ucs2_search(state, pattern);
527 assert(state->charsize == 4);
528 return sre_ucs4_search(state, pattern);
529}
530
Larry Hastings16c51912014-01-07 11:53:01 -0800531static PyObject *
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200532fix_string_param(PyObject *string, PyObject *string2, const char *oldname)
533{
534 if (string2 != NULL) {
535 if (string != NULL) {
536 PyErr_Format(PyExc_TypeError,
537 "Argument given by name ('%s') and position (1)",
538 oldname);
539 return NULL;
540 }
541 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
542 "The '%s' keyword parameter name is deprecated. "
543 "Use 'string' instead.", oldname) < 0)
544 return NULL;
545 return string2;
546 }
547 if (string == NULL) {
548 PyErr_SetString(PyExc_TypeError,
549 "Required argument 'string' (pos 1) not found");
550 return NULL;
551 }
552 return string;
553}
Larry Hastings16c51912014-01-07 11:53:01 -0800554
555static PyObject *
Larry Hastings5c661892014-01-24 06:17:25 -0800556pattern_match(PatternObject *self, PyObject *args, PyObject *kwargs)
Larry Hastings16c51912014-01-07 11:53:01 -0800557{
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200558 static char *_keywords[] = {"string", "pos", "endpos", "pattern", NULL};
559 PyObject *string = NULL;
Larry Hastings16c51912014-01-07 11:53:01 -0800560 Py_ssize_t pos = 0;
561 Py_ssize_t endpos = PY_SSIZE_T_MAX;
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200562 PyObject *pattern = NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000563 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100564 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300565 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000566
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200567 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
568 "|Onn$O:match", _keywords,
569 &string, &pos, &endpos, &pattern))
570 return NULL;
571 string = fix_string_param(string, pattern, "pattern");
572 if (!string)
573 return NULL;
574 string = state_init(&state, (PatternObject *)self, string, pos, endpos);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000575 if (!string)
576 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000577
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000578 state.ptr = state.start;
579
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000580 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
581
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300582 status = sre_match(&state, PatternObject_GetCode(self), 0);
Guido van Rossumb700df92000-03-31 14:59:30 +0000583
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000584 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300585 if (PyErr_Occurred()) {
586 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000587 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300588 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000589
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300590 match = pattern_new_match(self, &state, status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000591 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300592 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000593}
594
595static PyObject*
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200596pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kw)
597{
598 SRE_STATE state;
599 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300600 PyObject *match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200601
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200602 PyObject *string = NULL, *string2 = NULL;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200603 Py_ssize_t start = 0;
604 Py_ssize_t end = PY_SSIZE_T_MAX;
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200605 static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200606 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:fullmatch", kwlist,
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200607 &string, &start, &end, &string2))
608 return NULL;
609
610 string = fix_string_param(string, string2, "pattern");
611 if (!string)
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200612 return NULL;
613
614 string = state_init(&state, self, string, start, end);
615 if (!string)
616 return NULL;
617
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200618 state.ptr = state.start;
619
620 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
621
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300622 status = sre_match(&state, PatternObject_GetCode(self), 1);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200623
624 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300625 if (PyErr_Occurred()) {
626 state_fini(&state);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200627 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300628 }
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200629
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300630 match = pattern_new_match(self, &state, status);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200631 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300632 return match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200633}
634
635static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +0000636pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +0000637{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000638 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100639 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300640 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000641
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200642 PyObject *string = NULL, *string2 = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000643 Py_ssize_t start = 0;
644 Py_ssize_t end = PY_SSIZE_T_MAX;
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200645 static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
646 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:search", kwlist,
647 &string, &start, &end, &string2))
648 return NULL;
649
650 string = fix_string_param(string, string2, "pattern");
651 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000652 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000653
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000654 string = state_init(&state, self, string, start, end);
655 if (!string)
656 return NULL;
657
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000658 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
659
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300660 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000661
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000662 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
663
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300664 if (PyErr_Occurred()) {
665 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000666 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300667 }
Thomas Wouters89f507f2006-12-13 04:49:30 +0000668
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300669 match = pattern_new_match(self, &state, status);
670 state_fini(&state);
671 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000672}
673
674static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000675call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000676{
677 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000678 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000679 PyObject* func;
680 PyObject* result;
681
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000682 if (!args)
683 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000684 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000685 if (!name)
686 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000687 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000688 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000689 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000690 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000691 func = PyObject_GetAttrString(mod, function);
692 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000693 if (!func)
694 return NULL;
695 result = PyObject_CallObject(func, args);
696 Py_DECREF(func);
697 Py_DECREF(args);
698 return result;
699}
700
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000701#ifdef USE_BUILTIN_COPY
702static int
703deepcopy(PyObject** object, PyObject* memo)
704{
705 PyObject* copy;
706
707 copy = call(
708 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000709 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000710 );
711 if (!copy)
712 return 0;
713
714 Py_DECREF(*object);
715 *object = copy;
716
717 return 1; /* success */
718}
719#endif
720
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000721static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +0000722pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +0000723{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000724 SRE_STATE state;
725 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100726 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000727 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000728
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200729 PyObject *string = NULL, *string2 = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000730 Py_ssize_t start = 0;
731 Py_ssize_t end = PY_SSIZE_T_MAX;
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200732 static char* kwlist[] = { "string", "pos", "endpos", "source", NULL };
733 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:findall", kwlist,
734 &string, &start, &end, &string2))
735 return NULL;
736
737 string = fix_string_param(string, string2, "source");
738 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000739 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000740
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000741 string = state_init(&state, self, string, start, end);
742 if (!string)
743 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000744
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000745 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000746 if (!list) {
747 state_fini(&state);
748 return NULL;
749 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000750
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000751 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000752
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000753 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000754
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000755 state_reset(&state);
756
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000757 state.ptr = state.start;
758
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300759 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300760 if (PyErr_Occurred())
761 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000762
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000763 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000764 if (status == 0)
765 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000766 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000767 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000768 }
Tim Peters3d563502006-01-21 02:47:53 +0000769
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000770 /* don't bother to build a match object */
771 switch (self->groups) {
772 case 0:
773 b = STATE_OFFSET(&state, state.start);
774 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300775 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300776 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000777 if (!item)
778 goto error;
779 break;
780 case 1:
781 item = state_getslice(&state, 1, string, 1);
782 if (!item)
783 goto error;
784 break;
785 default:
786 item = PyTuple_New(self->groups);
787 if (!item)
788 goto error;
789 for (i = 0; i < self->groups; i++) {
790 PyObject* o = state_getslice(&state, i+1, string, 1);
791 if (!o) {
792 Py_DECREF(item);
793 goto error;
794 }
795 PyTuple_SET_ITEM(item, i, o);
796 }
797 break;
798 }
799
800 status = PyList_Append(list, item);
801 Py_DECREF(item);
802 if (status < 0)
803 goto error;
804
805 if (state.ptr == state.start)
806 state.start = (void*) ((char*) state.ptr + state.charsize);
807 else
808 state.start = state.ptr;
809
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000810 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000811
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000812 state_fini(&state);
813 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000814
815error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000816 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000817 state_fini(&state);
818 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000819
Guido van Rossumb700df92000-03-31 14:59:30 +0000820}
821
Fredrik Lundh703ce812001-10-24 22:16:30 +0000822static PyObject*
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600823pattern_finditer(PatternObject* pattern, PyObject* args, PyObject* kw)
Fredrik Lundh703ce812001-10-24 22:16:30 +0000824{
825 PyObject* scanner;
826 PyObject* search;
827 PyObject* iterator;
828
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600829 scanner = pattern_scanner(pattern, args, kw);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000830 if (!scanner)
831 return NULL;
832
833 search = PyObject_GetAttrString(scanner, "search");
834 Py_DECREF(scanner);
835 if (!search)
836 return NULL;
837
838 iterator = PyCallIter_New(search, Py_None);
839 Py_DECREF(search);
840
841 return iterator;
842}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000843
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000844static PyObject*
845pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
846{
847 SRE_STATE state;
848 PyObject* list;
849 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100850 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000851 Py_ssize_t n;
852 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000853 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000854
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200855 PyObject *string = NULL, *string2 = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000856 Py_ssize_t maxsplit = 0;
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200857 static char* kwlist[] = { "string", "maxsplit", "source", NULL };
858 if (!PyArg_ParseTupleAndKeywords(args, kw, "|On$O:split", kwlist,
859 &string, &maxsplit, &string2))
860 return NULL;
861
862 string = fix_string_param(string, string2, "source");
863 if (!string)
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000864 return NULL;
865
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200866 assert(self->codesize != 0);
867 if (self->code[0] != SRE_OP_INFO || self->code[3] == 0) {
868 if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) {
869 PyErr_SetString(PyExc_ValueError,
870 "split() requires a non-empty pattern match.");
871 return NULL;
872 }
873 if (PyErr_WarnEx(PyExc_FutureWarning,
874 "split() requires a non-empty pattern match.",
875 1) < 0)
876 return NULL;
877 }
878
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000879 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000880 if (!string)
881 return NULL;
882
883 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000884 if (!list) {
885 state_fini(&state);
886 return NULL;
887 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000888
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000889 n = 0;
890 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000891
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000892 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000893
894 state_reset(&state);
895
896 state.ptr = state.start;
897
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300898 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300899 if (PyErr_Occurred())
900 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000901
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000902 if (status <= 0) {
903 if (status == 0)
904 break;
905 pattern_error(status);
906 goto error;
907 }
Tim Peters3d563502006-01-21 02:47:53 +0000908
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000909 if (state.start == state.ptr) {
910 if (last == state.end)
911 break;
912 /* skip one character */
913 state.start = (void*) ((char*) state.ptr + state.charsize);
914 continue;
915 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000916
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000917 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300918 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000919 string, STATE_OFFSET(&state, last),
920 STATE_OFFSET(&state, state.start)
921 );
922 if (!item)
923 goto error;
924 status = PyList_Append(list, item);
925 Py_DECREF(item);
926 if (status < 0)
927 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000928
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000929 /* add groups (if any) */
930 for (i = 0; i < self->groups; i++) {
931 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000932 if (!item)
933 goto error;
934 status = PyList_Append(list, item);
935 Py_DECREF(item);
936 if (status < 0)
937 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000938 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000939
940 n = n + 1;
941
942 last = state.start = state.ptr;
943
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000944 }
945
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000946 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300947 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000948 string, STATE_OFFSET(&state, last), state.endpos
949 );
950 if (!item)
951 goto error;
952 status = PyList_Append(list, item);
953 Py_DECREF(item);
954 if (status < 0)
955 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000956
957 state_fini(&state);
958 return list;
959
960error:
961 Py_DECREF(list);
962 state_fini(&state);
963 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000964
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000965}
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000966
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000967static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000968pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000969 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000970{
971 SRE_STATE state;
972 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +0300973 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000974 PyObject* item;
975 PyObject* filter;
976 PyObject* args;
977 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000978 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100979 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000980 Py_ssize_t n;
981 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300982 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000983 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600984 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000985
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000986 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +0000987 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000988 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +0000989 Py_INCREF(filter);
990 filter_is_callable = 1;
991 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000992 /* if not callable, check if it's a literal string */
993 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600994 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300995 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200996 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000997 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300998 if (charsize == 1)
999 literal = memchr(ptr, '\\', n) == NULL;
1000 else
1001 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001002 } else {
1003 PyErr_Clear();
1004 literal = 0;
1005 }
Benjamin Petersone48944b2012-03-07 14:50:25 -06001006 if (view.buf)
1007 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001008 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001009 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001010 Py_INCREF(filter);
1011 filter_is_callable = 0;
1012 } else {
1013 /* not a literal; hand it over to the template compiler */
1014 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001015 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001016 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001017 );
1018 if (!filter)
1019 return NULL;
1020 filter_is_callable = PyCallable_Check(filter);
1021 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00001022 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001023
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001024 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001025 if (!string) {
1026 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001027 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001028 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001029
1030 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001031 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001032 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001033 state_fini(&state);
1034 return NULL;
1035 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001036
1037 n = i = 0;
1038
1039 while (!count || n < count) {
1040
1041 state_reset(&state);
1042
1043 state.ptr = state.start;
1044
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001045 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001046 if (PyErr_Occurred())
1047 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001048
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001049 if (status <= 0) {
1050 if (status == 0)
1051 break;
1052 pattern_error(status);
1053 goto error;
1054 }
Tim Peters3d563502006-01-21 02:47:53 +00001055
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001056 b = STATE_OFFSET(&state, state.start);
1057 e = STATE_OFFSET(&state, state.ptr);
1058
1059 if (i < b) {
1060 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001061 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001062 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001063 if (!item)
1064 goto error;
1065 status = PyList_Append(list, item);
1066 Py_DECREF(item);
1067 if (status < 0)
1068 goto error;
1069
1070 } else if (i == b && i == e && n > 0)
1071 /* ignore empty match on latest position */
1072 goto next;
1073
1074 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001075 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001076 match = pattern_new_match(self, &state, 1);
1077 if (!match)
1078 goto error;
Raymond Hettinger8ae46892003-10-12 19:09:37 +00001079 args = PyTuple_Pack(1, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001080 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00001081 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001082 goto error;
1083 }
1084 item = PyObject_CallObject(filter, args);
1085 Py_DECREF(args);
1086 Py_DECREF(match);
1087 if (!item)
1088 goto error;
1089 } else {
1090 /* filter is literal string */
1091 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001092 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001093 }
1094
1095 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001096 if (item != Py_None) {
1097 status = PyList_Append(list, item);
1098 Py_DECREF(item);
1099 if (status < 0)
1100 goto error;
1101 }
Tim Peters3d563502006-01-21 02:47:53 +00001102
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001103 i = e;
1104 n = n + 1;
1105
1106next:
1107 /* move on */
1108 if (state.ptr == state.start)
1109 state.start = (void*) ((char*) state.ptr + state.charsize);
1110 else
1111 state.start = state.ptr;
1112
1113 }
1114
1115 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001116 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001117 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001118 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001119 if (!item)
1120 goto error;
1121 status = PyList_Append(list, item);
1122 Py_DECREF(item);
1123 if (status < 0)
1124 goto error;
1125 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001126
1127 state_fini(&state);
1128
Guido van Rossum4e173842001-12-07 04:25:10 +00001129 Py_DECREF(filter);
1130
Fredrik Lundhdac58492001-10-21 21:48:30 +00001131 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001132 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001133 if (!joiner) {
1134 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001135 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001136 }
1137 if (PyList_GET_SIZE(list) == 0) {
1138 Py_DECREF(list);
1139 item = joiner;
1140 }
1141 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001142 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001143 item = _PyBytes_Join(joiner, list);
1144 else
1145 item = PyUnicode_Join(joiner, list);
1146 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001147 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001148 if (!item)
1149 return NULL;
1150 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001151
1152 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001153 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001154
1155 return item;
1156
1157error:
1158 Py_DECREF(list);
1159 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001160 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001161 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001162
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001163}
1164
1165static PyObject*
1166pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
1167{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001168 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001169 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001170 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001171 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001172 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001173 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001174 return NULL;
1175
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001176 return pattern_subx(self, ptemplate, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001177}
1178
1179static PyObject*
1180pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
1181{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001182 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001183 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001184 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001185 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001186 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001187 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001188 return NULL;
1189
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001190 return pattern_subx(self, ptemplate, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001191}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001192
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001193static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001194pattern_copy(PatternObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001195{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001196#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001197 PatternObject* copy;
1198 int offset;
1199
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001200 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
1201 if (!copy)
1202 return NULL;
1203
1204 offset = offsetof(PatternObject, groups);
1205
1206 Py_XINCREF(self->groupindex);
1207 Py_XINCREF(self->indexgroup);
1208 Py_XINCREF(self->pattern);
1209
1210 memcpy((char*) copy + offset, (char*) self + offset,
1211 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00001212 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001213
1214 return (PyObject*) copy;
1215#else
1216 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
1217 return NULL;
1218#endif
1219}
1220
1221static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001222pattern_deepcopy(PatternObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001223{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001224#ifdef USE_BUILTIN_COPY
1225 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00001226
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001227 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001228 if (!copy)
1229 return NULL;
1230
1231 if (!deepcopy(&copy->groupindex, memo) ||
1232 !deepcopy(&copy->indexgroup, memo) ||
1233 !deepcopy(&copy->pattern, memo)) {
1234 Py_DECREF(copy);
1235 return NULL;
1236 }
1237
1238#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001239 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
1240 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001241#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001242}
1243
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001244static PyObject *
1245pattern_repr(PatternObject *obj)
1246{
1247 static const struct {
1248 const char *name;
1249 int value;
1250 } flag_names[] = {
1251 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1252 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1253 {"re.LOCALE", SRE_FLAG_LOCALE},
1254 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1255 {"re.DOTALL", SRE_FLAG_DOTALL},
1256 {"re.UNICODE", SRE_FLAG_UNICODE},
1257 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1258 {"re.DEBUG", SRE_FLAG_DEBUG},
1259 {"re.ASCII", SRE_FLAG_ASCII},
1260 };
1261 PyObject *result = NULL;
1262 PyObject *flag_items;
Victor Stinner706768c2014-08-16 01:03:39 +02001263 size_t i;
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001264 int flags = obj->flags;
1265
1266 /* Omit re.UNICODE for valid string patterns. */
1267 if (obj->isbytes == 0 &&
1268 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1269 SRE_FLAG_UNICODE)
1270 flags &= ~SRE_FLAG_UNICODE;
1271
1272 flag_items = PyList_New(0);
1273 if (!flag_items)
1274 return NULL;
1275
1276 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1277 if (flags & flag_names[i].value) {
1278 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1279 if (!item)
1280 goto done;
1281
1282 if (PyList_Append(flag_items, item) < 0) {
1283 Py_DECREF(item);
1284 goto done;
1285 }
1286 Py_DECREF(item);
1287 flags &= ~flag_names[i].value;
1288 }
1289 }
1290 if (flags) {
1291 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1292 if (!item)
1293 goto done;
1294
1295 if (PyList_Append(flag_items, item) < 0) {
1296 Py_DECREF(item);
1297 goto done;
1298 }
1299 Py_DECREF(item);
1300 }
1301
1302 if (PyList_Size(flag_items) > 0) {
1303 PyObject *flags_result;
1304 PyObject *sep = PyUnicode_FromString("|");
1305 if (!sep)
1306 goto done;
1307 flags_result = PyUnicode_Join(sep, flag_items);
1308 Py_DECREF(sep);
1309 if (!flags_result)
1310 goto done;
1311 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1312 obj->pattern, flags_result);
1313 Py_DECREF(flags_result);
1314 }
1315 else {
1316 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1317 }
1318
1319done:
1320 Py_DECREF(flag_items);
1321 return result;
1322}
1323
Raymond Hettinger94478742004-09-24 04:31:19 +00001324PyDoc_STRVAR(pattern_match_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001325"match(string[, pos[, endpos]]) -> match object or None.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001326 Matches zero or more characters at the beginning of the string");
1327
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02001328PyDoc_STRVAR(pattern_fullmatch_doc,
1329"fullmatch(string[, pos[, endpos]]) -> match object or None.\n\
1330 Matches against all of the string");
1331
Raymond Hettinger94478742004-09-24 04:31:19 +00001332PyDoc_STRVAR(pattern_search_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001333"search(string[, pos[, endpos]]) -> match object or None.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001334 Scan through string looking for a match, and return a corresponding\n\
Andrew Svetlov0b64c142012-12-25 18:48:54 +02001335 match object instance. Return None if no position in the string matches.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001336
1337PyDoc_STRVAR(pattern_split_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001338"split(string[, maxsplit = 0]) -> list.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001339 Split string by the occurrences of pattern.");
1340
1341PyDoc_STRVAR(pattern_findall_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001342"findall(string[, pos[, endpos]]) -> list.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001343 Return a list of all non-overlapping matches of pattern in string.");
1344
1345PyDoc_STRVAR(pattern_finditer_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001346"finditer(string[, pos[, endpos]]) -> iterator.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001347 Return an iterator over all non-overlapping matches for the \n\
1348 RE pattern in string. For each match, the iterator returns a\n\
1349 match object.");
1350
1351PyDoc_STRVAR(pattern_sub_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001352"sub(repl, string[, count = 0]) -> newstring.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001353 Return the string obtained by replacing the leftmost non-overlapping\n\
Tim Peters3d563502006-01-21 02:47:53 +00001354 occurrences of pattern in string by the replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001355
1356PyDoc_STRVAR(pattern_subn_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001357"subn(repl, string[, count = 0]) -> (newstring, number of subs)\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001358 Return the tuple (new_string, number_of_subs_made) found by replacing\n\
1359 the leftmost non-overlapping occurrences of pattern with the\n\
Tim Peters3d563502006-01-21 02:47:53 +00001360 replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001361
1362PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
1363
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001364static PyMethodDef pattern_methods[] = {
Tim Peters3d563502006-01-21 02:47:53 +00001365 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001366 pattern_match_doc},
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02001367 {"fullmatch", (PyCFunction) pattern_fullmatch, METH_VARARGS|METH_KEYWORDS,
1368 pattern_fullmatch_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001369 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001370 pattern_search_doc},
Raymond Hettinger94478742004-09-24 04:31:19 +00001371 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001372 pattern_sub_doc},
Raymond Hettinger94478742004-09-24 04:31:19 +00001373 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001374 pattern_subn_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001375 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001376 pattern_split_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001377 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001378 pattern_findall_doc},
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001379 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001380 pattern_finditer_doc},
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001381 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS|METH_KEYWORDS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001382 {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
1383 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001384 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001385};
1386
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001387/* PatternObject's 'groupindex' method. */
1388static PyObject *
1389pattern_groupindex(PatternObject *self)
1390{
1391 return PyDictProxy_New(self->groupindex);
1392}
1393
1394static PyGetSetDef pattern_getset[] = {
1395 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
1396 "A dictionary mapping group names to group numbers."},
1397 {NULL} /* Sentinel */
1398};
1399
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00001400#define PAT_OFF(x) offsetof(PatternObject, x)
1401static PyMemberDef pattern_members[] = {
1402 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
1403 {"flags", T_INT, PAT_OFF(flags), READONLY},
1404 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00001405 {NULL} /* Sentinel */
1406};
Guido van Rossumb700df92000-03-31 14:59:30 +00001407
Neal Norwitz57c179c2006-03-22 07:18:02 +00001408static PyTypeObject Pattern_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001409 PyVarObject_HEAD_INIT(NULL, 0)
1410 "_" SRE_MODULE ".SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001411 sizeof(PatternObject), sizeof(SRE_CODE),
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001412 (destructor)pattern_dealloc, /* tp_dealloc */
1413 0, /* tp_print */
1414 0, /* tp_getattr */
1415 0, /* tp_setattr */
1416 0, /* tp_reserved */
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001417 (reprfunc)pattern_repr, /* tp_repr */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001418 0, /* tp_as_number */
1419 0, /* tp_as_sequence */
1420 0, /* tp_as_mapping */
1421 0, /* tp_hash */
1422 0, /* tp_call */
1423 0, /* tp_str */
1424 0, /* tp_getattro */
1425 0, /* tp_setattro */
1426 0, /* tp_as_buffer */
1427 Py_TPFLAGS_DEFAULT, /* tp_flags */
1428 pattern_doc, /* tp_doc */
1429 0, /* tp_traverse */
1430 0, /* tp_clear */
1431 0, /* tp_richcompare */
1432 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
1433 0, /* tp_iter */
1434 0, /* tp_iternext */
1435 pattern_methods, /* tp_methods */
1436 pattern_members, /* tp_members */
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001437 pattern_getset, /* tp_getset */
Guido van Rossumb700df92000-03-31 14:59:30 +00001438};
1439
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001440static int _validate(PatternObject *self); /* Forward */
1441
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001442static PyObject *
1443_compile(PyObject* self_, PyObject* args)
1444{
1445 /* "compile" pattern descriptor to pattern object */
1446
1447 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001448 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001449
1450 PyObject* pattern;
1451 int flags = 0;
1452 PyObject* code;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001453 Py_ssize_t groups = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001454 PyObject* groupindex = NULL;
1455 PyObject* indexgroup = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001456
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001457 if (!PyArg_ParseTuple(args, "OiO!nOO", &pattern, &flags,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001458 &PyList_Type, &code, &groups,
1459 &groupindex, &indexgroup))
1460 return NULL;
1461
1462 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001463 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001464 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1465 if (!self)
1466 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001467 self->weakreflist = NULL;
1468 self->pattern = NULL;
1469 self->groupindex = NULL;
1470 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001471
1472 self->codesize = n;
1473
1474 for (i = 0; i < n; i++) {
1475 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001476 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001477 self->code[i] = (SRE_CODE) value;
1478 if ((unsigned long) self->code[i] != value) {
1479 PyErr_SetString(PyExc_OverflowError,
1480 "regular expression code size limit exceeded");
1481 break;
1482 }
1483 }
1484
1485 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001486 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001487 return NULL;
1488 }
1489
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001490 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001491 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001492 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001493 else {
1494 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001495 int charsize;
1496 Py_buffer view;
1497 view.buf = NULL;
1498 if (!getstring(pattern, &p_length, &self->isbytes,
1499 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001500 Py_DECREF(self);
1501 return NULL;
1502 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001503 if (view.buf)
1504 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001505 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001506
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001507 Py_INCREF(pattern);
1508 self->pattern = pattern;
1509
1510 self->flags = flags;
1511
1512 self->groups = groups;
1513
1514 Py_XINCREF(groupindex);
1515 self->groupindex = groupindex;
1516
1517 Py_XINCREF(indexgroup);
1518 self->indexgroup = indexgroup;
1519
1520 self->weakreflist = NULL;
1521
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001522 if (!_validate(self)) {
1523 Py_DECREF(self);
1524 return NULL;
1525 }
1526
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001527 return (PyObject*) self;
1528}
1529
Guido van Rossumb700df92000-03-31 14:59:30 +00001530/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001531/* Code validation */
1532
1533/* To learn more about this code, have a look at the _compile() function in
1534 Lib/sre_compile.py. The validation functions below checks the code array
1535 for conformance with the code patterns generated there.
1536
1537 The nice thing about the generated code is that it is position-independent:
1538 all jumps are relative jumps forward. Also, jumps don't cross each other:
1539 the target of a later jump is always earlier than the target of an earlier
1540 jump. IOW, this is okay:
1541
1542 J---------J-------T--------T
1543 \ \_____/ /
1544 \______________________/
1545
1546 but this is not:
1547
1548 J---------J-------T--------T
1549 \_________\_____/ /
1550 \____________/
1551
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001552 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001553*/
1554
1555/* Defining this one enables tracing of the validator */
1556#undef VVERBOSE
1557
1558/* Trace macro for the validator */
1559#if defined(VVERBOSE)
1560#define VTRACE(v) printf v
1561#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001562#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001563#endif
1564
1565/* Report failure */
1566#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1567
1568/* Extract opcode, argument, or skip count from code array */
1569#define GET_OP \
1570 do { \
1571 VTRACE(("%p: ", code)); \
1572 if (code >= end) FAIL; \
1573 op = *code++; \
1574 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1575 } while (0)
1576#define GET_ARG \
1577 do { \
1578 VTRACE(("%p= ", code)); \
1579 if (code >= end) FAIL; \
1580 arg = *code++; \
1581 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1582 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001583#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001584 do { \
1585 VTRACE(("%p= ", code)); \
1586 if (code >= end) FAIL; \
1587 skip = *code; \
1588 VTRACE(("%lu (skip to %p)\n", \
1589 (unsigned long)skip, code+skip)); \
Victor Stinner1fa174a2013-08-28 02:06:21 +02001590 if (skip-adj > (Py_uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001591 FAIL; \
1592 code++; \
1593 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001594#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001595
1596static int
1597_validate_charset(SRE_CODE *code, SRE_CODE *end)
1598{
1599 /* Some variables are manipulated by the macros above */
1600 SRE_CODE op;
1601 SRE_CODE arg;
1602 SRE_CODE offset;
1603 int i;
1604
1605 while (code < end) {
1606 GET_OP;
1607 switch (op) {
1608
1609 case SRE_OP_NEGATE:
1610 break;
1611
1612 case SRE_OP_LITERAL:
1613 GET_ARG;
1614 break;
1615
1616 case SRE_OP_RANGE:
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +02001617 case SRE_OP_RANGE_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001618 GET_ARG;
1619 GET_ARG;
1620 break;
1621
1622 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001623 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001624 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001625 FAIL;
1626 code += offset;
1627 break;
1628
1629 case SRE_OP_BIGCHARSET:
1630 GET_ARG; /* Number of blocks */
1631 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001632 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001633 FAIL;
1634 /* Make sure that each byte points to a valid block */
1635 for (i = 0; i < 256; i++) {
1636 if (((unsigned char *)code)[i] >= arg)
1637 FAIL;
1638 }
1639 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001640 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001641 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001642 FAIL;
1643 code += offset;
1644 break;
1645
1646 case SRE_OP_CATEGORY:
1647 GET_ARG;
1648 switch (arg) {
1649 case SRE_CATEGORY_DIGIT:
1650 case SRE_CATEGORY_NOT_DIGIT:
1651 case SRE_CATEGORY_SPACE:
1652 case SRE_CATEGORY_NOT_SPACE:
1653 case SRE_CATEGORY_WORD:
1654 case SRE_CATEGORY_NOT_WORD:
1655 case SRE_CATEGORY_LINEBREAK:
1656 case SRE_CATEGORY_NOT_LINEBREAK:
1657 case SRE_CATEGORY_LOC_WORD:
1658 case SRE_CATEGORY_LOC_NOT_WORD:
1659 case SRE_CATEGORY_UNI_DIGIT:
1660 case SRE_CATEGORY_UNI_NOT_DIGIT:
1661 case SRE_CATEGORY_UNI_SPACE:
1662 case SRE_CATEGORY_UNI_NOT_SPACE:
1663 case SRE_CATEGORY_UNI_WORD:
1664 case SRE_CATEGORY_UNI_NOT_WORD:
1665 case SRE_CATEGORY_UNI_LINEBREAK:
1666 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1667 break;
1668 default:
1669 FAIL;
1670 }
1671 break;
1672
1673 default:
1674 FAIL;
1675
1676 }
1677 }
1678
1679 return 1;
1680}
1681
1682static int
1683_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1684{
1685 /* Some variables are manipulated by the macros above */
1686 SRE_CODE op;
1687 SRE_CODE arg;
1688 SRE_CODE skip;
1689
1690 VTRACE(("code=%p, end=%p\n", code, end));
1691
1692 if (code > end)
1693 FAIL;
1694
1695 while (code < end) {
1696 GET_OP;
1697 switch (op) {
1698
1699 case SRE_OP_MARK:
1700 /* We don't check whether marks are properly nested; the
1701 sre_match() code is robust even if they don't, and the worst
1702 you can get is nonsensical match results. */
1703 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001704 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001705 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1706 FAIL;
1707 }
1708 break;
1709
1710 case SRE_OP_LITERAL:
1711 case SRE_OP_NOT_LITERAL:
1712 case SRE_OP_LITERAL_IGNORE:
1713 case SRE_OP_NOT_LITERAL_IGNORE:
1714 GET_ARG;
1715 /* The arg is just a character, nothing to check */
1716 break;
1717
1718 case SRE_OP_SUCCESS:
1719 case SRE_OP_FAILURE:
1720 /* Nothing to check; these normally end the matching process */
1721 break;
1722
1723 case SRE_OP_AT:
1724 GET_ARG;
1725 switch (arg) {
1726 case SRE_AT_BEGINNING:
1727 case SRE_AT_BEGINNING_STRING:
1728 case SRE_AT_BEGINNING_LINE:
1729 case SRE_AT_END:
1730 case SRE_AT_END_LINE:
1731 case SRE_AT_END_STRING:
1732 case SRE_AT_BOUNDARY:
1733 case SRE_AT_NON_BOUNDARY:
1734 case SRE_AT_LOC_BOUNDARY:
1735 case SRE_AT_LOC_NON_BOUNDARY:
1736 case SRE_AT_UNI_BOUNDARY:
1737 case SRE_AT_UNI_NON_BOUNDARY:
1738 break;
1739 default:
1740 FAIL;
1741 }
1742 break;
1743
1744 case SRE_OP_ANY:
1745 case SRE_OP_ANY_ALL:
1746 /* These have no operands */
1747 break;
1748
1749 case SRE_OP_IN:
1750 case SRE_OP_IN_IGNORE:
1751 GET_SKIP;
1752 /* Stop 1 before the end; we check the FAILURE below */
1753 if (!_validate_charset(code, code+skip-2))
1754 FAIL;
1755 if (code[skip-2] != SRE_OP_FAILURE)
1756 FAIL;
1757 code += skip-1;
1758 break;
1759
1760 case SRE_OP_INFO:
1761 {
1762 /* A minimal info field is
1763 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1764 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1765 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001766 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001767 SRE_CODE *newcode;
1768 GET_SKIP;
1769 newcode = code+skip-1;
1770 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001771 GET_ARG;
1772 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001773 /* Check that only valid flags are present */
1774 if ((flags & ~(SRE_INFO_PREFIX |
1775 SRE_INFO_LITERAL |
1776 SRE_INFO_CHARSET)) != 0)
1777 FAIL;
1778 /* PREFIX and CHARSET are mutually exclusive */
1779 if ((flags & SRE_INFO_PREFIX) &&
1780 (flags & SRE_INFO_CHARSET))
1781 FAIL;
1782 /* LITERAL implies PREFIX */
1783 if ((flags & SRE_INFO_LITERAL) &&
1784 !(flags & SRE_INFO_PREFIX))
1785 FAIL;
1786 /* Validate the prefix */
1787 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001788 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001789 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001790 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001791 /* Here comes the prefix string */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001792 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001793 FAIL;
1794 code += prefix_len;
1795 /* And here comes the overlap table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001796 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001797 FAIL;
1798 /* Each overlap value should be < prefix_len */
1799 for (i = 0; i < prefix_len; i++) {
1800 if (code[i] >= prefix_len)
1801 FAIL;
1802 }
1803 code += prefix_len;
1804 }
1805 /* Validate the charset */
1806 if (flags & SRE_INFO_CHARSET) {
1807 if (!_validate_charset(code, newcode-1))
1808 FAIL;
1809 if (newcode[-1] != SRE_OP_FAILURE)
1810 FAIL;
1811 code = newcode;
1812 }
1813 else if (code != newcode) {
1814 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1815 FAIL;
1816 }
1817 }
1818 break;
1819
1820 case SRE_OP_BRANCH:
1821 {
1822 SRE_CODE *target = NULL;
1823 for (;;) {
1824 GET_SKIP;
1825 if (skip == 0)
1826 break;
1827 /* Stop 2 before the end; we check the JUMP below */
1828 if (!_validate_inner(code, code+skip-3, groups))
1829 FAIL;
1830 code += skip-3;
1831 /* Check that it ends with a JUMP, and that each JUMP
1832 has the same target */
1833 GET_OP;
1834 if (op != SRE_OP_JUMP)
1835 FAIL;
1836 GET_SKIP;
1837 if (target == NULL)
1838 target = code+skip-1;
1839 else if (code+skip-1 != target)
1840 FAIL;
1841 }
1842 }
1843 break;
1844
1845 case SRE_OP_REPEAT_ONE:
1846 case SRE_OP_MIN_REPEAT_ONE:
1847 {
1848 SRE_CODE min, max;
1849 GET_SKIP;
1850 GET_ARG; min = arg;
1851 GET_ARG; max = arg;
1852 if (min > max)
1853 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001854 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001855 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001856 if (!_validate_inner(code, code+skip-4, groups))
1857 FAIL;
1858 code += skip-4;
1859 GET_OP;
1860 if (op != SRE_OP_SUCCESS)
1861 FAIL;
1862 }
1863 break;
1864
1865 case SRE_OP_REPEAT:
1866 {
1867 SRE_CODE min, max;
1868 GET_SKIP;
1869 GET_ARG; min = arg;
1870 GET_ARG; max = arg;
1871 if (min > max)
1872 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001873 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001874 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001875 if (!_validate_inner(code, code+skip-3, groups))
1876 FAIL;
1877 code += skip-3;
1878 GET_OP;
1879 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1880 FAIL;
1881 }
1882 break;
1883
1884 case SRE_OP_GROUPREF:
1885 case SRE_OP_GROUPREF_IGNORE:
1886 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001887 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001888 FAIL;
1889 break;
1890
1891 case SRE_OP_GROUPREF_EXISTS:
1892 /* The regex syntax for this is: '(?(group)then|else)', where
1893 'group' is either an integer group number or a group name,
1894 'then' and 'else' are sub-regexes, and 'else' is optional. */
1895 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001896 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001897 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001898 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001899 code--; /* The skip is relative to the first arg! */
1900 /* There are two possibilities here: if there is both a 'then'
1901 part and an 'else' part, the generated code looks like:
1902
1903 GROUPREF_EXISTS
1904 <group>
1905 <skipyes>
1906 ...then part...
1907 JUMP
1908 <skipno>
1909 (<skipyes> jumps here)
1910 ...else part...
1911 (<skipno> jumps here)
1912
1913 If there is only a 'then' part, it looks like:
1914
1915 GROUPREF_EXISTS
1916 <group>
1917 <skip>
1918 ...then part...
1919 (<skip> jumps here)
1920
1921 There is no direct way to decide which it is, and we don't want
1922 to allow arbitrary jumps anywhere in the code; so we just look
1923 for a JUMP opcode preceding our skip target.
1924 */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001925 if (skip >= 3 && skip-3 < (Py_uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001926 code[skip-3] == SRE_OP_JUMP)
1927 {
1928 VTRACE(("both then and else parts present\n"));
1929 if (!_validate_inner(code+1, code+skip-3, groups))
1930 FAIL;
1931 code += skip-2; /* Position after JUMP, at <skipno> */
1932 GET_SKIP;
1933 if (!_validate_inner(code, code+skip-1, groups))
1934 FAIL;
1935 code += skip-1;
1936 }
1937 else {
1938 VTRACE(("only a then part present\n"));
1939 if (!_validate_inner(code+1, code+skip-1, groups))
1940 FAIL;
1941 code += skip-1;
1942 }
1943 break;
1944
1945 case SRE_OP_ASSERT:
1946 case SRE_OP_ASSERT_NOT:
1947 GET_SKIP;
1948 GET_ARG; /* 0 for lookahead, width for lookbehind */
1949 code--; /* Back up over arg to simplify math below */
1950 if (arg & 0x80000000)
1951 FAIL; /* Width too large */
1952 /* Stop 1 before the end; we check the SUCCESS below */
1953 if (!_validate_inner(code+1, code+skip-2, groups))
1954 FAIL;
1955 code += skip-2;
1956 GET_OP;
1957 if (op != SRE_OP_SUCCESS)
1958 FAIL;
1959 break;
1960
1961 default:
1962 FAIL;
1963
1964 }
1965 }
1966
1967 VTRACE(("okay\n"));
1968 return 1;
1969}
1970
1971static int
1972_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1973{
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001974 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1975 code >= end || end[-1] != SRE_OP_SUCCESS)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001976 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001977 return _validate_inner(code, end-1, groups);
1978}
1979
1980static int
1981_validate(PatternObject *self)
1982{
1983 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1984 {
1985 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1986 return 0;
1987 }
1988 else
1989 VTRACE(("Success!\n"));
1990 return 1;
1991}
1992
1993/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001994/* match methods */
1995
1996static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001997match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001998{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001999 Py_XDECREF(self->regs);
2000 Py_XDECREF(self->string);
2001 Py_DECREF(self->pattern);
2002 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00002003}
2004
2005static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002006match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00002007{
Serhiy Storchaka25324972013-10-16 12:46:28 +03002008 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002009 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03002010 Py_buffer view;
2011 PyObject *result;
2012 void* ptr;
2013
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002014 if (index < 0 || index >= self->groups) {
2015 /* raise IndexError if we were given a bad group number */
2016 PyErr_SetString(
2017 PyExc_IndexError,
2018 "no such group"
2019 );
2020 return NULL;
2021 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002022
Fredrik Lundh6f013982000-07-03 18:44:21 +00002023 index *= 2;
2024
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002025 if (self->string == Py_None || self->mark[index] < 0) {
2026 /* return default value if the string or group is undefined */
2027 Py_INCREF(def);
2028 return def;
2029 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002030
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002031 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03002032 if (ptr == NULL)
2033 return NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002034 result = getslice(isbytes, ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +03002035 self->string, self->mark[index], self->mark[index+1]);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002036 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03002037 PyBuffer_Release(&view);
2038 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002039}
2040
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002041static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002042match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002043{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002044 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002045
Guido van Rossumddefaf32007-01-14 03:31:43 +00002046 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002047 /* Default value */
2048 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00002049
Christian Heimes217cfd12007-12-02 14:31:20 +00002050 if (PyLong_Check(index))
2051 return PyLong_AsSsize_t(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00002052
Fredrik Lundh6f013982000-07-03 18:44:21 +00002053 i = -1;
2054
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002055 if (self->pattern->groupindex) {
2056 index = PyObject_GetItem(self->pattern->groupindex, index);
2057 if (index) {
Neal Norwitz1fe5f382007-08-31 04:32:55 +00002058 if (PyLong_Check(index))
Christian Heimes217cfd12007-12-02 14:31:20 +00002059 i = PyLong_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00002060 Py_DECREF(index);
2061 } else
2062 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002063 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002064
2065 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002066}
2067
2068static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002069match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002070{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002071 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002072}
2073
2074static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002075match_expand(MatchObject* self, PyObject* ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002076{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002077 /* delegate to Python code */
2078 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00002079 SRE_PY_MODULE, "_expand",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002080 PyTuple_Pack(3, self->pattern, self, ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002081 );
2082}
2083
2084static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002085match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002086{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002087 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002088 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002089
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002090 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002091
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002092 switch (size) {
2093 case 0:
2094 result = match_getslice(self, Py_False, Py_None);
2095 break;
2096 case 1:
2097 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2098 break;
2099 default:
2100 /* fetch multiple items */
2101 result = PyTuple_New(size);
2102 if (!result)
2103 return NULL;
2104 for (i = 0; i < size; i++) {
2105 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002106 self, PyTuple_GET_ITEM(args, i), Py_None
2107 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002108 if (!item) {
2109 Py_DECREF(result);
2110 return NULL;
2111 }
2112 PyTuple_SET_ITEM(result, i, item);
2113 }
2114 break;
2115 }
2116 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002117}
2118
2119static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002120match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002121{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002122 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002123 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002124
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002125 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002126 static char* kwlist[] = { "default", NULL };
Fredrik Lundh562586e2000-10-03 20:43:34 +00002127 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002128 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002129
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002130 result = PyTuple_New(self->groups-1);
2131 if (!result)
2132 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002133
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002134 for (index = 1; index < self->groups; index++) {
2135 PyObject* item;
2136 item = match_getslice_by_index(self, index, def);
2137 if (!item) {
2138 Py_DECREF(result);
2139 return NULL;
2140 }
2141 PyTuple_SET_ITEM(result, index-1, item);
2142 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002143
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002144 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002145}
2146
2147static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002148match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002149{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002150 PyObject* result;
2151 PyObject* keys;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002152 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002153
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002154 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002155 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00002156 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002157 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002158
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002159 result = PyDict_New();
2160 if (!result || !self->pattern->groupindex)
2161 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002162
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002163 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002164 if (!keys)
2165 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002166
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002167 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002168 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002169 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002170 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002171 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002172 if (!key)
2173 goto failed;
2174 value = match_getslice(self, key, def);
2175 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002176 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002177 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002178 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002179 status = PyDict_SetItem(result, key, value);
2180 Py_DECREF(value);
2181 if (status < 0)
2182 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002183 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002184
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002185 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002186
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002187 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002188
2189failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00002190 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002191 Py_DECREF(result);
2192 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002193}
2194
2195static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002196match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002197{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002198 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002199
Guido van Rossumddefaf32007-01-14 03:31:43 +00002200 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002201 if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002202 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002203
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002204 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002205
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002206 if (index < 0 || index >= self->groups) {
2207 PyErr_SetString(
2208 PyExc_IndexError,
2209 "no such group"
2210 );
2211 return NULL;
2212 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002213
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002214 /* mark is -1 if group is undefined */
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002215 return PyLong_FromSsize_t(self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002216}
2217
2218static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002219match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002220{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002221 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002222
Guido van Rossumddefaf32007-01-14 03:31:43 +00002223 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002224 if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002225 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002226
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002227 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002228
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002229 if (index < 0 || index >= self->groups) {
2230 PyErr_SetString(
2231 PyExc_IndexError,
2232 "no such group"
2233 );
2234 return NULL;
2235 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002236
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002237 /* mark is -1 if group is undefined */
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002238 return PyLong_FromSsize_t(self->mark[index*2+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002239}
2240
2241LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002242_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002243{
2244 PyObject* pair;
2245 PyObject* item;
2246
2247 pair = PyTuple_New(2);
2248 if (!pair)
2249 return NULL;
2250
Christian Heimes217cfd12007-12-02 14:31:20 +00002251 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002252 if (!item)
2253 goto error;
2254 PyTuple_SET_ITEM(pair, 0, item);
2255
Christian Heimes217cfd12007-12-02 14:31:20 +00002256 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002257 if (!item)
2258 goto error;
2259 PyTuple_SET_ITEM(pair, 1, item);
2260
2261 return pair;
2262
2263 error:
2264 Py_DECREF(pair);
2265 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002266}
2267
2268static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002269match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002270{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002271 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002272
Guido van Rossumddefaf32007-01-14 03:31:43 +00002273 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002274 if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002275 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002276
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002277 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002278
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002279 if (index < 0 || index >= self->groups) {
2280 PyErr_SetString(
2281 PyExc_IndexError,
2282 "no such group"
2283 );
2284 return NULL;
2285 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002286
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002287 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002288 return _pair(self->mark[index*2], self->mark[index*2+1]);
2289}
2290
2291static PyObject*
2292match_regs(MatchObject* self)
2293{
2294 PyObject* regs;
2295 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002296 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002297
2298 regs = PyTuple_New(self->groups);
2299 if (!regs)
2300 return NULL;
2301
2302 for (index = 0; index < self->groups; index++) {
2303 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2304 if (!item) {
2305 Py_DECREF(regs);
2306 return NULL;
2307 }
2308 PyTuple_SET_ITEM(regs, index, item);
2309 }
2310
2311 Py_INCREF(regs);
2312 self->regs = regs;
2313
2314 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002315}
2316
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002317static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002318match_copy(MatchObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002319{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002320#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002321 MatchObject* copy;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002322 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00002323
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002324 slots = 2 * (self->pattern->groups+1);
2325
2326 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2327 if (!copy)
2328 return NULL;
2329
2330 /* this value a constant, but any compiler should be able to
2331 figure that out all by itself */
2332 offset = offsetof(MatchObject, string);
2333
2334 Py_XINCREF(self->pattern);
2335 Py_XINCREF(self->string);
2336 Py_XINCREF(self->regs);
2337
2338 memcpy((char*) copy + offset, (char*) self + offset,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002339 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002340
2341 return (PyObject*) copy;
2342#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002343 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002344 return NULL;
2345#endif
2346}
2347
2348static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002349match_deepcopy(MatchObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002350{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002351#ifdef USE_BUILTIN_COPY
2352 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002353
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002354 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002355 if (!copy)
2356 return NULL;
2357
2358 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2359 !deepcopy(&copy->string, memo) ||
2360 !deepcopy(&copy->regs, memo)) {
2361 Py_DECREF(copy);
2362 return NULL;
2363 }
2364
2365#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002366 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2367 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002368#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002369}
2370
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002371PyDoc_STRVAR(match_doc,
2372"The result of re.match() and re.search().\n\
2373Match objects always have a boolean value of True.");
2374
2375PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002376"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002377 Return subgroup(s) of the match by indices or names.\n\
2378 For 0 returns the entire match.");
2379
2380PyDoc_STRVAR(match_start_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002381"start([group=0]) -> int.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002382 Return index of the start of the substring matched by group.");
2383
2384PyDoc_STRVAR(match_end_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002385"end([group=0]) -> int.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002386 Return index of the end of the substring matched by group.");
2387
2388PyDoc_STRVAR(match_span_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002389"span([group]) -> tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002390 For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).");
2391
2392PyDoc_STRVAR(match_groups_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002393"groups([default=None]) -> tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002394 Return a tuple containing all the subgroups of the match, from 1.\n\
2395 The default argument is used for groups\n\
2396 that did not participate in the match");
2397
2398PyDoc_STRVAR(match_groupdict_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002399"groupdict([default=None]) -> dict.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002400 Return a dictionary containing all the named subgroups of the match,\n\
2401 keyed by the subgroup name. The default argument is used for groups\n\
2402 that did not participate in the match");
2403
2404PyDoc_STRVAR(match_expand_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002405"expand(template) -> str.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002406 Return the string obtained by doing backslash substitution\n\
2407 on the string template, as done by the sub() method.");
2408
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002409static PyMethodDef match_methods[] = {
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002410 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2411 {"start", (PyCFunction) match_start, METH_VARARGS, match_start_doc},
2412 {"end", (PyCFunction) match_end, METH_VARARGS, match_end_doc},
2413 {"span", (PyCFunction) match_span, METH_VARARGS, match_span_doc},
2414 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS,
2415 match_groups_doc},
2416 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS,
2417 match_groupdict_doc},
2418 {"expand", (PyCFunction) match_expand, METH_O, match_expand_doc},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002419 {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
2420 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002421 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002422};
2423
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002424static PyObject *
2425match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002426{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002427 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002428 return PyLong_FromSsize_t(self->lastindex);
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002429 Py_INCREF(Py_None);
2430 return Py_None;
Guido van Rossumb700df92000-03-31 14:59:30 +00002431}
2432
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002433static PyObject *
2434match_lastgroup_get(MatchObject *self)
2435{
2436 if (self->pattern->indexgroup && self->lastindex >= 0) {
2437 PyObject* result = PySequence_GetItem(
2438 self->pattern->indexgroup, self->lastindex
2439 );
2440 if (result)
2441 return result;
2442 PyErr_Clear();
2443 }
2444 Py_INCREF(Py_None);
2445 return Py_None;
2446}
2447
2448static PyObject *
2449match_regs_get(MatchObject *self)
2450{
2451 if (self->regs) {
2452 Py_INCREF(self->regs);
2453 return self->regs;
2454 } else
2455 return match_regs(self);
2456}
2457
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002458static PyObject *
2459match_repr(MatchObject *self)
2460{
2461 PyObject *result;
2462 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2463 if (group0 == NULL)
2464 return NULL;
2465 result = PyUnicode_FromFormat(
2466 "<%s object; span=(%d, %d), match=%.50R>",
2467 Py_TYPE(self)->tp_name,
2468 self->mark[0], self->mark[1], group0);
2469 Py_DECREF(group0);
2470 return result;
2471}
2472
2473
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002474static PyGetSetDef match_getset[] = {
2475 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
2476 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
2477 {"regs", (getter)match_regs_get, (setter)NULL},
2478 {NULL}
2479};
2480
2481#define MATCH_OFF(x) offsetof(MatchObject, x)
2482static PyMemberDef match_members[] = {
2483 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
2484 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
2485 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
2486 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
2487 {NULL}
2488};
2489
Guido van Rossumb700df92000-03-31 14:59:30 +00002490/* FIXME: implement setattr("string", None) as a special case (to
2491 detach the associated string, if any */
2492
Neal Norwitz57c179c2006-03-22 07:18:02 +00002493static PyTypeObject Match_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002494 PyVarObject_HEAD_INIT(NULL,0)
2495 "_" SRE_MODULE ".SRE_Match",
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002496 sizeof(MatchObject), sizeof(Py_ssize_t),
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002497 (destructor)match_dealloc, /* tp_dealloc */
2498 0, /* tp_print */
2499 0, /* tp_getattr */
2500 0, /* tp_setattr */
2501 0, /* tp_reserved */
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002502 (reprfunc)match_repr, /* tp_repr */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002503 0, /* tp_as_number */
2504 0, /* tp_as_sequence */
2505 0, /* tp_as_mapping */
2506 0, /* tp_hash */
2507 0, /* tp_call */
2508 0, /* tp_str */
2509 0, /* tp_getattro */
2510 0, /* tp_setattro */
2511 0, /* tp_as_buffer */
2512 Py_TPFLAGS_DEFAULT, /* tp_flags */
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002513 match_doc, /* tp_doc */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002514 0, /* tp_traverse */
2515 0, /* tp_clear */
2516 0, /* tp_richcompare */
2517 0, /* tp_weaklistoffset */
2518 0, /* tp_iter */
2519 0, /* tp_iternext */
2520 match_methods, /* tp_methods */
2521 match_members, /* tp_members */
2522 match_getset, /* tp_getset */
Guido van Rossumb700df92000-03-31 14:59:30 +00002523};
2524
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002525static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002526pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002527{
2528 /* create match object (from state object) */
2529
2530 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002531 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002532 char* base;
2533 int n;
2534
2535 if (status > 0) {
2536
2537 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002538 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002539 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2540 2*(pattern->groups+1));
2541 if (!match)
2542 return NULL;
2543
2544 Py_INCREF(pattern);
2545 match->pattern = pattern;
2546
2547 Py_INCREF(state->string);
2548 match->string = state->string;
2549
2550 match->regs = NULL;
2551 match->groups = pattern->groups+1;
2552
2553 /* fill in group slices */
2554
2555 base = (char*) state->beginning;
2556 n = state->charsize;
2557
2558 match->mark[0] = ((char*) state->start - base) / n;
2559 match->mark[1] = ((char*) state->ptr - base) / n;
2560
2561 for (i = j = 0; i < pattern->groups; i++, j+=2)
2562 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2563 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2564 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2565 } else
2566 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2567
2568 match->pos = state->pos;
2569 match->endpos = state->endpos;
2570
2571 match->lastindex = state->lastindex;
2572
2573 return (PyObject*) match;
2574
2575 } else if (status == 0) {
2576
2577 /* no match */
2578 Py_INCREF(Py_None);
2579 return Py_None;
2580
2581 }
2582
2583 /* internal error */
2584 pattern_error(status);
2585 return NULL;
2586}
2587
2588
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002589/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002590/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002591
2592static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002593scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002594{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002595 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002596 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002597 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002598}
2599
2600static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002601scanner_match(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002602{
2603 SRE_STATE* state = &self->state;
2604 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002605 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002606
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002607 state_reset(state);
2608
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002609 state->ptr = state->start;
2610
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03002611 status = sre_match(state, PatternObject_GetCode(self->pattern), 0);
Thomas Wouters89f507f2006-12-13 04:49:30 +00002612 if (PyErr_Occurred())
2613 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002614
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002615 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002616 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002617
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00002618 if (status == 0 || state->ptr == state->start)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002619 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002620 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002621 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002622
2623 return match;
2624}
2625
2626
2627static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002628scanner_search(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002629{
2630 SRE_STATE* state = &self->state;
2631 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002632 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002633
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002634 state_reset(state);
2635
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002636 state->ptr = state->start;
2637
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002638 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002639 if (PyErr_Occurred())
2640 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002641
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002642 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002643 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002644
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00002645 if (status == 0 || state->ptr == state->start)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002646 state->start = (void*) ((char*) state->ptr + state->charsize);
2647 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002648 state->start = state->ptr;
2649
2650 return match;
2651}
2652
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002653static PyMethodDef scanner_methods[] = {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002654 {"match", (PyCFunction) scanner_match, METH_NOARGS},
2655 {"search", (PyCFunction) scanner_search, METH_NOARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002656 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002657};
2658
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002659#define SCAN_OFF(x) offsetof(ScannerObject, x)
2660static PyMemberDef scanner_members[] = {
Ezio Melotti7c8c1ea2011-09-29 01:00:19 +03002661 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002662 {NULL} /* Sentinel */
2663};
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002664
Neal Norwitz57c179c2006-03-22 07:18:02 +00002665static PyTypeObject Scanner_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002666 PyVarObject_HEAD_INIT(NULL, 0)
2667 "_" SRE_MODULE ".SRE_Scanner",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002668 sizeof(ScannerObject), 0,
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002669 (destructor)scanner_dealloc,/* tp_dealloc */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002670 0, /* tp_print */
2671 0, /* tp_getattr */
2672 0, /* tp_setattr */
2673 0, /* tp_reserved */
2674 0, /* tp_repr */
2675 0, /* tp_as_number */
2676 0, /* tp_as_sequence */
2677 0, /* tp_as_mapping */
2678 0, /* tp_hash */
2679 0, /* tp_call */
2680 0, /* tp_str */
2681 0, /* tp_getattro */
2682 0, /* tp_setattro */
2683 0, /* tp_as_buffer */
2684 Py_TPFLAGS_DEFAULT, /* tp_flags */
2685 0, /* tp_doc */
2686 0, /* tp_traverse */
2687 0, /* tp_clear */
2688 0, /* tp_richcompare */
2689 0, /* tp_weaklistoffset */
2690 0, /* tp_iter */
2691 0, /* tp_iternext */
2692 scanner_methods, /* tp_methods */
2693 scanner_members, /* tp_members */
2694 0, /* tp_getset */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002695};
2696
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002697static PyObject*
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06002698pattern_scanner(PatternObject* pattern, PyObject* args, PyObject* kw)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002699{
2700 /* create search state object */
2701
2702 ScannerObject* self;
2703
Serhiy Storchakaccdf3522014-03-06 11:28:32 +02002704 PyObject *string = NULL, *string2 = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002705 Py_ssize_t start = 0;
2706 Py_ssize_t end = PY_SSIZE_T_MAX;
Serhiy Storchakaccdf3522014-03-06 11:28:32 +02002707 static char* kwlist[] = { "string", "pos", "endpos", "source", NULL };
2708 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:scanner", kwlist,
2709 &string, &start, &end, &string2))
2710 return NULL;
2711
2712 string = fix_string_param(string, string2, "source");
2713 if (!string)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002714 return NULL;
2715
2716 /* create scanner object */
2717 self = PyObject_NEW(ScannerObject, &Scanner_Type);
2718 if (!self)
2719 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002720 self->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002721
2722 string = state_init(&self->state, pattern, string, start, end);
2723 if (!string) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002724 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002725 return NULL;
2726 }
2727
2728 Py_INCREF(pattern);
2729 self->pattern = (PyObject*) pattern;
2730
2731 return (PyObject*) self;
2732}
2733
Guido van Rossumb700df92000-03-31 14:59:30 +00002734static PyMethodDef _functions[] = {
Neal Norwitzb0493252002-03-31 14:44:22 +00002735 {"compile", _compile, METH_VARARGS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002736 {"getcodesize", sre_codesize, METH_NOARGS},
Neal Norwitzb0493252002-03-31 14:44:22 +00002737 {"getlower", sre_getlower, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002738 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002739};
2740
Martin v. Löwis1a214512008-06-11 05:26:20 +00002741static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002742 PyModuleDef_HEAD_INIT,
2743 "_" SRE_MODULE,
2744 NULL,
2745 -1,
2746 _functions,
2747 NULL,
2748 NULL,
2749 NULL,
2750 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002751};
2752
2753PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002754{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002755 PyObject* m;
2756 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002757 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002758
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002759 /* Patch object types */
2760 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2761 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002762 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002763
Martin v. Löwis1a214512008-06-11 05:26:20 +00002764 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002765 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002766 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002767 d = PyModule_GetDict(m);
2768
Christian Heimes217cfd12007-12-02 14:31:20 +00002769 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002770 if (x) {
2771 PyDict_SetItemString(d, "MAGIC", x);
2772 Py_DECREF(x);
2773 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002774
Christian Heimes217cfd12007-12-02 14:31:20 +00002775 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002776 if (x) {
2777 PyDict_SetItemString(d, "CODESIZE", x);
2778 Py_DECREF(x);
2779 }
2780
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002781 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2782 if (x) {
2783 PyDict_SetItemString(d, "MAXREPEAT", x);
2784 Py_DECREF(x);
2785 }
2786
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03002787 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2788 if (x) {
2789 PyDict_SetItemString(d, "MAXGROUPS", x);
2790 Py_DECREF(x);
2791 }
2792
Neal Norwitzfe537132007-08-26 03:55:15 +00002793 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002794 if (x) {
2795 PyDict_SetItemString(d, "copyright", x);
2796 Py_DECREF(x);
2797 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002798 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002799}
2800
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002801/* vim:ts=4:sw=4:et
2802*/