blob: 83eb96376a7ff4e56f9507fb124067671f1eaa14 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000038static char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000063/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064
65/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000066#define USE_FAST_SEARCH
67
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000068/* enables copy/deepcopy handling (work in progress) */
69#undef USE_BUILTIN_COPY
70
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000071/* -------------------------------------------------------------------- */
72
Fredrik Lundh80946112000-06-29 18:03:25 +000073#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000074#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000075#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000076/* fastest possible local call under MSVC */
77#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000078#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000079#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080#else
81#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000082#endif
83
84/* error codes */
85#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000086#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000087#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000088#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000089#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000090
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000091#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000092#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000093#else
94#define TRACE(v)
95#endif
96
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000097/* -------------------------------------------------------------------- */
98/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000099
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000100#define SRE_IS_DIGIT(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300101 ((ch) < 128 && Py_ISDIGIT(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000102#define SRE_IS_SPACE(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300103 ((ch) < 128 && Py_ISSPACE(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000104#define SRE_IS_LINEBREAK(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300105 ((ch) == '\n')
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000106#define SRE_IS_ALNUM(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300107 ((ch) < 128 && Py_ISALNUM(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000108#define SRE_IS_WORD(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300109 ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
Guido van Rossumb700df92000-03-31 14:59:30 +0000110
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000111static unsigned int sre_lower(unsigned int ch)
112{
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300113 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000114}
115
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200116static unsigned int sre_upper(unsigned int ch)
117{
118 return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
119}
120
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000121/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000122/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
123 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000124#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000125#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
126
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000127static unsigned int sre_lower_locale(unsigned int ch)
128{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000129 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000130}
131
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200132static unsigned int sre_upper_locale(unsigned int ch)
133{
134 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
135}
136
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000137/* unicode-specific character predicates */
138
Victor Stinner0058b862011-09-29 03:27:47 +0200139#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
140#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
141#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
142#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
143#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000144
145static unsigned int sre_lower_unicode(unsigned int ch)
146{
Victor Stinner0058b862011-09-29 03:27:47 +0200147 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000148}
149
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200150static unsigned int sre_upper_unicode(unsigned int ch)
151{
152 return (unsigned int) Py_UNICODE_TOUPPER(ch);
153}
154
Guido van Rossumb700df92000-03-31 14:59:30 +0000155LOCAL(int)
156sre_category(SRE_CODE category, unsigned int ch)
157{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000158 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000159
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000160 case SRE_CATEGORY_DIGIT:
161 return SRE_IS_DIGIT(ch);
162 case SRE_CATEGORY_NOT_DIGIT:
163 return !SRE_IS_DIGIT(ch);
164 case SRE_CATEGORY_SPACE:
165 return SRE_IS_SPACE(ch);
166 case SRE_CATEGORY_NOT_SPACE:
167 return !SRE_IS_SPACE(ch);
168 case SRE_CATEGORY_WORD:
169 return SRE_IS_WORD(ch);
170 case SRE_CATEGORY_NOT_WORD:
171 return !SRE_IS_WORD(ch);
172 case SRE_CATEGORY_LINEBREAK:
173 return SRE_IS_LINEBREAK(ch);
174 case SRE_CATEGORY_NOT_LINEBREAK:
175 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000176
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000177 case SRE_CATEGORY_LOC_WORD:
178 return SRE_LOC_IS_WORD(ch);
179 case SRE_CATEGORY_LOC_NOT_WORD:
180 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000181
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000182 case SRE_CATEGORY_UNI_DIGIT:
183 return SRE_UNI_IS_DIGIT(ch);
184 case SRE_CATEGORY_UNI_NOT_DIGIT:
185 return !SRE_UNI_IS_DIGIT(ch);
186 case SRE_CATEGORY_UNI_SPACE:
187 return SRE_UNI_IS_SPACE(ch);
188 case SRE_CATEGORY_UNI_NOT_SPACE:
189 return !SRE_UNI_IS_SPACE(ch);
190 case SRE_CATEGORY_UNI_WORD:
191 return SRE_UNI_IS_WORD(ch);
192 case SRE_CATEGORY_UNI_NOT_WORD:
193 return !SRE_UNI_IS_WORD(ch);
194 case SRE_CATEGORY_UNI_LINEBREAK:
195 return SRE_UNI_IS_LINEBREAK(ch);
196 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
197 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000198 }
199 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000200}
201
202/* helpers */
203
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000204static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000205data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000206{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000207 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000208 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000209 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000210 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000211 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000212}
213
214static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000215data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000216{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000217 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000218 minsize = state->data_stack_base+size;
219 cursize = state->data_stack_size;
220 if (cursize < minsize) {
221 void* stack;
222 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300223 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000224 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000225 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000226 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000227 return SRE_ERROR_MEMORY;
228 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000229 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000230 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000231 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000232 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000233}
234
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000235/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000236
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300237#define SRE_CHAR Py_UCS1
238#define SIZEOF_SRE_CHAR 1
239#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300240#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000241
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300242/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000243
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300244#define SRE_CHAR Py_UCS2
245#define SIZEOF_SRE_CHAR 2
246#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300247#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000248
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300249/* generate 32-bit unicode version */
250
251#define SRE_CHAR Py_UCS4
252#define SIZEOF_SRE_CHAR 4
253#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300254#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000255
256/* -------------------------------------------------------------------- */
257/* factories and destructors */
258
259/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100260static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600261static PyObject*pattern_scanner(PatternObject*, PyObject*, PyObject* kw);
Guido van Rossumb700df92000-03-31 14:59:30 +0000262
263static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000264sre_codesize(PyObject* self, PyObject *unused)
Guido van Rossumb700df92000-03-31 14:59:30 +0000265{
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100266 return PyLong_FromSize_t(sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +0000267}
268
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000269static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +0000270sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000271{
272 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000273 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000274 return NULL;
275 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000276 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000277 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000278 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundhb389df32000-06-29 12:48:37 +0000279 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000280}
281
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000282LOCAL(void)
283state_reset(SRE_STATE* state)
284{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000285 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000286 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000287
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000288 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000289 state->lastindex = -1;
290
291 state->repeat = NULL;
292
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000293 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000294}
295
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000296static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200297getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300298 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600299 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000300{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000301 /* given a python object, return a data pointer, a length (in
302 characters), and a character size. return NULL if the object
303 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000304
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000305 /* Unicode objects do not support the buffer API. So, get the data
306 directly instead. */
307 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200308 if (PyUnicode_READY(string) == -1)
309 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200310 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200311 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300312 *p_isbytes = 0;
313 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000314 }
315
Victor Stinner0058b862011-09-29 03:27:47 +0200316 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300317 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200318 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300319 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000320 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000321
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300322 *p_length = view->len;
323 *p_charsize = 1;
324 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000325
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300326 if (view->buf == NULL) {
327 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
328 PyBuffer_Release(view);
329 view->buf = NULL;
330 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000331 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300332 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000333}
334
335LOCAL(PyObject*)
336state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000337 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000338{
339 /* prepare state object */
340
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000341 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300342 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000343 void* ptr;
344
345 memset(state, 0, sizeof(SRE_STATE));
346
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300347 state->mark = PyMem_New(void *, pattern->groups * 2);
348 if (!state->mark) {
349 PyErr_NoMemory();
350 goto err;
351 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000352 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000353 state->lastindex = -1;
354
Benjamin Petersone48944b2012-03-07 14:50:25 -0600355 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300356 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000357 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600358 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000359
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300360 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600361 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200362 "cannot use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600363 goto err;
364 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300365 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600366 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200367 "cannot use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600368 goto err;
369 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000370
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000371 /* adjust boundaries */
372 if (start < 0)
373 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000374 else if (start > length)
375 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000376
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000377 if (end < 0)
378 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000379 else if (end > length)
380 end = length;
381
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300382 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000383 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000384
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000385 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000386
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000387 state->start = (void*) ((char*) ptr + start * state->charsize);
388 state->end = (void*) ((char*) ptr + end * state->charsize);
389
390 Py_INCREF(string);
391 state->string = string;
392 state->pos = start;
393 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000394
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200395 if (pattern->flags & SRE_FLAG_LOCALE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000396 state->lower = sre_lower_locale;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200397 state->upper = sre_upper_locale;
398 }
399 else if (pattern->flags & SRE_FLAG_UNICODE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000400 state->lower = sre_lower_unicode;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200401 state->upper = sre_upper_unicode;
402 }
403 else {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000404 state->lower = sre_lower;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200405 state->upper = sre_upper;
406 }
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000407
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000408 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600409 err:
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300410 PyMem_Del(state->mark);
411 state->mark = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600412 if (state->buffer.buf)
413 PyBuffer_Release(&state->buffer);
414 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000415}
416
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000417LOCAL(void)
418state_fini(SRE_STATE* state)
419{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600420 if (state->buffer.buf)
421 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000422 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000423 data_stack_dealloc(state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300424 PyMem_Del(state->mark);
425 state->mark = NULL;
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000426}
427
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000428/* calculate offset from start of string */
429#define STATE_OFFSET(state, member)\
430 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
431
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000432LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300433getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300434 PyObject* string, Py_ssize_t start, Py_ssize_t end)
435{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300436 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300437 if (PyBytes_CheckExact(string) &&
438 start == 0 && end == PyBytes_GET_SIZE(string)) {
439 Py_INCREF(string);
440 return string;
441 }
442 return PyBytes_FromStringAndSize(
443 (const char *)ptr + start, end - start);
444 }
445 else {
446 return PyUnicode_Substring(string, start, end);
447 }
448}
449
450LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000451state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000452{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000453 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000454
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000455 index = (index - 1) * 2;
456
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000457 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000458 if (empty)
459 /* want empty string */
460 i = j = 0;
461 else {
462 Py_INCREF(Py_None);
463 return Py_None;
464 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000465 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000466 i = STATE_OFFSET(state, state->mark[index]);
467 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000468 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000469
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300470 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000471}
472
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000473static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100474pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000475{
476 switch (status) {
477 case SRE_ERROR_RECURSION_LIMIT:
478 PyErr_SetString(
479 PyExc_RuntimeError,
480 "maximum recursion limit exceeded"
481 );
482 break;
483 case SRE_ERROR_MEMORY:
484 PyErr_NoMemory();
485 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000486 case SRE_ERROR_INTERRUPTED:
487 /* An exception has already been raised, so let it fly */
488 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000489 default:
490 /* other error codes indicate compiler/engine bugs */
491 PyErr_SetString(
492 PyExc_RuntimeError,
493 "internal error in regular expression engine"
494 );
495 }
496}
497
Guido van Rossumb700df92000-03-31 14:59:30 +0000498static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000499pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000500{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000501 if (self->weakreflist != NULL)
502 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000503 Py_XDECREF(self->pattern);
504 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000505 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000506 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000507}
508
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300509LOCAL(Py_ssize_t)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300510sre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300511{
512 if (state->charsize == 1)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300513 return sre_ucs1_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300514 if (state->charsize == 2)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300515 return sre_ucs2_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300516 assert(state->charsize == 4);
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300517 return sre_ucs4_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300518}
519
520LOCAL(Py_ssize_t)
521sre_search(SRE_STATE* state, SRE_CODE* pattern)
522{
523 if (state->charsize == 1)
524 return sre_ucs1_search(state, pattern);
525 if (state->charsize == 2)
526 return sre_ucs2_search(state, pattern);
527 assert(state->charsize == 4);
528 return sre_ucs4_search(state, pattern);
529}
530
Larry Hastings16c51912014-01-07 11:53:01 -0800531static PyObject *
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200532fix_string_param(PyObject *string, PyObject *string2, const char *oldname)
533{
534 if (string2 != NULL) {
535 if (string != NULL) {
536 PyErr_Format(PyExc_TypeError,
537 "Argument given by name ('%s') and position (1)",
538 oldname);
539 return NULL;
540 }
541 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
542 "The '%s' keyword parameter name is deprecated. "
543 "Use 'string' instead.", oldname) < 0)
544 return NULL;
545 return string2;
546 }
547 if (string == NULL) {
548 PyErr_SetString(PyExc_TypeError,
549 "Required argument 'string' (pos 1) not found");
550 return NULL;
551 }
552 return string;
553}
Larry Hastings16c51912014-01-07 11:53:01 -0800554
555static PyObject *
Larry Hastings5c661892014-01-24 06:17:25 -0800556pattern_match(PatternObject *self, PyObject *args, PyObject *kwargs)
Larry Hastings16c51912014-01-07 11:53:01 -0800557{
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200558 static char *_keywords[] = {"string", "pos", "endpos", "pattern", NULL};
559 PyObject *string = NULL;
Larry Hastings16c51912014-01-07 11:53:01 -0800560 Py_ssize_t pos = 0;
561 Py_ssize_t endpos = PY_SSIZE_T_MAX;
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200562 PyObject *pattern = NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000563 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100564 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300565 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000566
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200567 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
568 "|Onn$O:match", _keywords,
569 &string, &pos, &endpos, &pattern))
570 return NULL;
571 string = fix_string_param(string, pattern, "pattern");
572 if (!string)
573 return NULL;
574 string = state_init(&state, (PatternObject *)self, string, pos, endpos);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000575 if (!string)
576 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000577
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000578 state.ptr = state.start;
579
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000580 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
581
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300582 status = sre_match(&state, PatternObject_GetCode(self), 0);
Guido van Rossumb700df92000-03-31 14:59:30 +0000583
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000584 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300585 if (PyErr_Occurred()) {
586 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000587 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300588 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000589
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300590 match = pattern_new_match(self, &state, status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000591 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300592 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000593}
594
595static PyObject*
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200596pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kw)
597{
598 SRE_STATE state;
599 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300600 PyObject *match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200601
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200602 PyObject *string = NULL, *string2 = NULL;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200603 Py_ssize_t start = 0;
604 Py_ssize_t end = PY_SSIZE_T_MAX;
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200605 static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200606 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:fullmatch", kwlist,
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200607 &string, &start, &end, &string2))
608 return NULL;
609
610 string = fix_string_param(string, string2, "pattern");
611 if (!string)
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200612 return NULL;
613
614 string = state_init(&state, self, string, start, end);
615 if (!string)
616 return NULL;
617
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200618 state.ptr = state.start;
619
620 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
621
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300622 status = sre_match(&state, PatternObject_GetCode(self), 1);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200623
624 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300625 if (PyErr_Occurred()) {
626 state_fini(&state);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200627 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300628 }
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200629
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300630 match = pattern_new_match(self, &state, status);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200631 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300632 return match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200633}
634
635static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +0000636pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +0000637{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000638 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100639 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300640 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000641
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200642 PyObject *string = NULL, *string2 = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000643 Py_ssize_t start = 0;
644 Py_ssize_t end = PY_SSIZE_T_MAX;
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200645 static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
646 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:search", kwlist,
647 &string, &start, &end, &string2))
648 return NULL;
649
650 string = fix_string_param(string, string2, "pattern");
651 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000652 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000653
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000654 string = state_init(&state, self, string, start, end);
655 if (!string)
656 return NULL;
657
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000658 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
659
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300660 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000661
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000662 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
663
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300664 if (PyErr_Occurred()) {
665 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000666 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300667 }
Thomas Wouters89f507f2006-12-13 04:49:30 +0000668
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300669 match = pattern_new_match(self, &state, status);
670 state_fini(&state);
671 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000672}
673
674static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000675call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000676{
677 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000678 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000679 PyObject* func;
680 PyObject* result;
681
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000682 if (!args)
683 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000684 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000685 if (!name)
686 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000687 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000688 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000689 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000690 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000691 func = PyObject_GetAttrString(mod, function);
692 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000693 if (!func)
694 return NULL;
695 result = PyObject_CallObject(func, args);
696 Py_DECREF(func);
697 Py_DECREF(args);
698 return result;
699}
700
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000701#ifdef USE_BUILTIN_COPY
702static int
703deepcopy(PyObject** object, PyObject* memo)
704{
705 PyObject* copy;
706
707 copy = call(
708 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000709 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000710 );
711 if (!copy)
712 return 0;
713
714 Py_DECREF(*object);
715 *object = copy;
716
717 return 1; /* success */
718}
719#endif
720
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000721static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +0000722pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +0000723{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000724 SRE_STATE state;
725 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100726 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000727 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000728
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200729 PyObject *string = NULL, *string2 = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000730 Py_ssize_t start = 0;
731 Py_ssize_t end = PY_SSIZE_T_MAX;
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200732 static char* kwlist[] = { "string", "pos", "endpos", "source", NULL };
733 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:findall", kwlist,
734 &string, &start, &end, &string2))
735 return NULL;
736
737 string = fix_string_param(string, string2, "source");
738 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000739 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000740
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000741 string = state_init(&state, self, string, start, end);
742 if (!string)
743 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000744
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000745 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000746 if (!list) {
747 state_fini(&state);
748 return NULL;
749 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000750
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000751 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000752
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000753 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000754
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000755 state_reset(&state);
756
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000757 state.ptr = state.start;
758
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300759 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300760 if (PyErr_Occurred())
761 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000762
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000763 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000764 if (status == 0)
765 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000766 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000767 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000768 }
Tim Peters3d563502006-01-21 02:47:53 +0000769
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000770 /* don't bother to build a match object */
771 switch (self->groups) {
772 case 0:
773 b = STATE_OFFSET(&state, state.start);
774 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300775 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300776 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000777 if (!item)
778 goto error;
779 break;
780 case 1:
781 item = state_getslice(&state, 1, string, 1);
782 if (!item)
783 goto error;
784 break;
785 default:
786 item = PyTuple_New(self->groups);
787 if (!item)
788 goto error;
789 for (i = 0; i < self->groups; i++) {
790 PyObject* o = state_getslice(&state, i+1, string, 1);
791 if (!o) {
792 Py_DECREF(item);
793 goto error;
794 }
795 PyTuple_SET_ITEM(item, i, o);
796 }
797 break;
798 }
799
800 status = PyList_Append(list, item);
801 Py_DECREF(item);
802 if (status < 0)
803 goto error;
804
805 if (state.ptr == state.start)
806 state.start = (void*) ((char*) state.ptr + state.charsize);
807 else
808 state.start = state.ptr;
809
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000810 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000811
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000812 state_fini(&state);
813 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000814
815error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000816 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000817 state_fini(&state);
818 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000819
Guido van Rossumb700df92000-03-31 14:59:30 +0000820}
821
Fredrik Lundh703ce812001-10-24 22:16:30 +0000822static PyObject*
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600823pattern_finditer(PatternObject* pattern, PyObject* args, PyObject* kw)
Fredrik Lundh703ce812001-10-24 22:16:30 +0000824{
825 PyObject* scanner;
826 PyObject* search;
827 PyObject* iterator;
828
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600829 scanner = pattern_scanner(pattern, args, kw);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000830 if (!scanner)
831 return NULL;
832
833 search = PyObject_GetAttrString(scanner, "search");
834 Py_DECREF(scanner);
835 if (!search)
836 return NULL;
837
838 iterator = PyCallIter_New(search, Py_None);
839 Py_DECREF(search);
840
841 return iterator;
842}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000843
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000844static PyObject*
845pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
846{
847 SRE_STATE state;
848 PyObject* list;
849 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100850 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000851 Py_ssize_t n;
852 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000853 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000854
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200855 PyObject *string = NULL, *string2 = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000856 Py_ssize_t maxsplit = 0;
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200857 static char* kwlist[] = { "string", "maxsplit", "source", NULL };
858 if (!PyArg_ParseTupleAndKeywords(args, kw, "|On$O:split", kwlist,
859 &string, &maxsplit, &string2))
860 return NULL;
861
862 string = fix_string_param(string, string2, "source");
863 if (!string)
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000864 return NULL;
865
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200866 assert(self->codesize != 0);
867 if (self->code[0] != SRE_OP_INFO || self->code[3] == 0) {
868 if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) {
869 PyErr_SetString(PyExc_ValueError,
870 "split() requires a non-empty pattern match.");
871 return NULL;
872 }
873 if (PyErr_WarnEx(PyExc_FutureWarning,
874 "split() requires a non-empty pattern match.",
875 1) < 0)
876 return NULL;
877 }
878
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000879 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000880 if (!string)
881 return NULL;
882
883 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000884 if (!list) {
885 state_fini(&state);
886 return NULL;
887 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000888
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000889 n = 0;
890 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000891
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000892 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000893
894 state_reset(&state);
895
896 state.ptr = state.start;
897
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300898 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300899 if (PyErr_Occurred())
900 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000901
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000902 if (status <= 0) {
903 if (status == 0)
904 break;
905 pattern_error(status);
906 goto error;
907 }
Tim Peters3d563502006-01-21 02:47:53 +0000908
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000909 if (state.start == state.ptr) {
910 if (last == state.end)
911 break;
912 /* skip one character */
913 state.start = (void*) ((char*) state.ptr + state.charsize);
914 continue;
915 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000916
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000917 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300918 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000919 string, STATE_OFFSET(&state, last),
920 STATE_OFFSET(&state, state.start)
921 );
922 if (!item)
923 goto error;
924 status = PyList_Append(list, item);
925 Py_DECREF(item);
926 if (status < 0)
927 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000928
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000929 /* add groups (if any) */
930 for (i = 0; i < self->groups; i++) {
931 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000932 if (!item)
933 goto error;
934 status = PyList_Append(list, item);
935 Py_DECREF(item);
936 if (status < 0)
937 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000938 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000939
940 n = n + 1;
941
942 last = state.start = state.ptr;
943
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000944 }
945
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000946 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300947 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000948 string, STATE_OFFSET(&state, last), state.endpos
949 );
950 if (!item)
951 goto error;
952 status = PyList_Append(list, item);
953 Py_DECREF(item);
954 if (status < 0)
955 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000956
957 state_fini(&state);
958 return list;
959
960error:
961 Py_DECREF(list);
962 state_fini(&state);
963 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000964
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000965}
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000966
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000967static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000968pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000969 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000970{
971 SRE_STATE state;
972 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +0300973 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000974 PyObject* item;
975 PyObject* filter;
976 PyObject* args;
977 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000978 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100979 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000980 Py_ssize_t n;
981 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300982 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000983 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600984 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000985
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000986 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +0000987 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000988 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +0000989 Py_INCREF(filter);
990 filter_is_callable = 1;
991 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000992 /* if not callable, check if it's a literal string */
993 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600994 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300995 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200996 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000997 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300998 if (charsize == 1)
999 literal = memchr(ptr, '\\', n) == NULL;
1000 else
1001 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001002 } else {
1003 PyErr_Clear();
1004 literal = 0;
1005 }
Benjamin Petersone48944b2012-03-07 14:50:25 -06001006 if (view.buf)
1007 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001008 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001009 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001010 Py_INCREF(filter);
1011 filter_is_callable = 0;
1012 } else {
1013 /* not a literal; hand it over to the template compiler */
1014 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001015 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001016 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001017 );
1018 if (!filter)
1019 return NULL;
1020 filter_is_callable = PyCallable_Check(filter);
1021 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00001022 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001023
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001024 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001025 if (!string) {
1026 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001027 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001028 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001029
1030 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001031 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001032 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001033 state_fini(&state);
1034 return NULL;
1035 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001036
1037 n = i = 0;
1038
1039 while (!count || n < count) {
1040
1041 state_reset(&state);
1042
1043 state.ptr = state.start;
1044
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001045 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001046 if (PyErr_Occurred())
1047 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001048
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001049 if (status <= 0) {
1050 if (status == 0)
1051 break;
1052 pattern_error(status);
1053 goto error;
1054 }
Tim Peters3d563502006-01-21 02:47:53 +00001055
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001056 b = STATE_OFFSET(&state, state.start);
1057 e = STATE_OFFSET(&state, state.ptr);
1058
1059 if (i < b) {
1060 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001061 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001062 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001063 if (!item)
1064 goto error;
1065 status = PyList_Append(list, item);
1066 Py_DECREF(item);
1067 if (status < 0)
1068 goto error;
1069
1070 } else if (i == b && i == e && n > 0)
1071 /* ignore empty match on latest position */
1072 goto next;
1073
1074 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001075 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001076 match = pattern_new_match(self, &state, 1);
1077 if (!match)
1078 goto error;
Raymond Hettinger8ae46892003-10-12 19:09:37 +00001079 args = PyTuple_Pack(1, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001080 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00001081 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001082 goto error;
1083 }
1084 item = PyObject_CallObject(filter, args);
1085 Py_DECREF(args);
1086 Py_DECREF(match);
1087 if (!item)
1088 goto error;
1089 } else {
1090 /* filter is literal string */
1091 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001092 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001093 }
1094
1095 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001096 if (item != Py_None) {
1097 status = PyList_Append(list, item);
1098 Py_DECREF(item);
1099 if (status < 0)
1100 goto error;
1101 }
Tim Peters3d563502006-01-21 02:47:53 +00001102
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001103 i = e;
1104 n = n + 1;
1105
1106next:
1107 /* move on */
1108 if (state.ptr == state.start)
1109 state.start = (void*) ((char*) state.ptr + state.charsize);
1110 else
1111 state.start = state.ptr;
1112
1113 }
1114
1115 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001116 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001117 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001118 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001119 if (!item)
1120 goto error;
1121 status = PyList_Append(list, item);
1122 Py_DECREF(item);
1123 if (status < 0)
1124 goto error;
1125 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001126
1127 state_fini(&state);
1128
Guido van Rossum4e173842001-12-07 04:25:10 +00001129 Py_DECREF(filter);
1130
Fredrik Lundhdac58492001-10-21 21:48:30 +00001131 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001132 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001133 if (!joiner) {
1134 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001135 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001136 }
1137 if (PyList_GET_SIZE(list) == 0) {
1138 Py_DECREF(list);
1139 item = joiner;
1140 }
1141 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001142 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001143 item = _PyBytes_Join(joiner, list);
1144 else
1145 item = PyUnicode_Join(joiner, list);
1146 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001147 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001148 if (!item)
1149 return NULL;
1150 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001151
1152 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001153 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001154
1155 return item;
1156
1157error:
1158 Py_DECREF(list);
1159 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001160 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001161 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001162
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001163}
1164
1165static PyObject*
1166pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
1167{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001168 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001169 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001170 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001171 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001172 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001173 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001174 return NULL;
1175
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001176 return pattern_subx(self, ptemplate, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001177}
1178
1179static PyObject*
1180pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
1181{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001182 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001183 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001184 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001185 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001186 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001187 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001188 return NULL;
1189
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001190 return pattern_subx(self, ptemplate, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001191}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001192
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001193static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001194pattern_copy(PatternObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001195{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001196#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001197 PatternObject* copy;
1198 int offset;
1199
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001200 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
1201 if (!copy)
1202 return NULL;
1203
1204 offset = offsetof(PatternObject, groups);
1205
1206 Py_XINCREF(self->groupindex);
1207 Py_XINCREF(self->indexgroup);
1208 Py_XINCREF(self->pattern);
1209
1210 memcpy((char*) copy + offset, (char*) self + offset,
1211 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00001212 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001213
1214 return (PyObject*) copy;
1215#else
1216 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
1217 return NULL;
1218#endif
1219}
1220
1221static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001222pattern_deepcopy(PatternObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001223{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001224#ifdef USE_BUILTIN_COPY
1225 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00001226
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001227 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001228 if (!copy)
1229 return NULL;
1230
1231 if (!deepcopy(&copy->groupindex, memo) ||
1232 !deepcopy(&copy->indexgroup, memo) ||
1233 !deepcopy(&copy->pattern, memo)) {
1234 Py_DECREF(copy);
1235 return NULL;
1236 }
1237
1238#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001239 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
1240 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001241#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001242}
1243
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001244static PyObject *
1245pattern_repr(PatternObject *obj)
1246{
1247 static const struct {
1248 const char *name;
1249 int value;
1250 } flag_names[] = {
1251 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1252 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1253 {"re.LOCALE", SRE_FLAG_LOCALE},
1254 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1255 {"re.DOTALL", SRE_FLAG_DOTALL},
1256 {"re.UNICODE", SRE_FLAG_UNICODE},
1257 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1258 {"re.DEBUG", SRE_FLAG_DEBUG},
1259 {"re.ASCII", SRE_FLAG_ASCII},
1260 };
1261 PyObject *result = NULL;
1262 PyObject *flag_items;
Victor Stinner706768c2014-08-16 01:03:39 +02001263 size_t i;
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001264 int flags = obj->flags;
1265
1266 /* Omit re.UNICODE for valid string patterns. */
1267 if (obj->isbytes == 0 &&
1268 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1269 SRE_FLAG_UNICODE)
1270 flags &= ~SRE_FLAG_UNICODE;
1271
1272 flag_items = PyList_New(0);
1273 if (!flag_items)
1274 return NULL;
1275
1276 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1277 if (flags & flag_names[i].value) {
1278 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1279 if (!item)
1280 goto done;
1281
1282 if (PyList_Append(flag_items, item) < 0) {
1283 Py_DECREF(item);
1284 goto done;
1285 }
1286 Py_DECREF(item);
1287 flags &= ~flag_names[i].value;
1288 }
1289 }
1290 if (flags) {
1291 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1292 if (!item)
1293 goto done;
1294
1295 if (PyList_Append(flag_items, item) < 0) {
1296 Py_DECREF(item);
1297 goto done;
1298 }
1299 Py_DECREF(item);
1300 }
1301
1302 if (PyList_Size(flag_items) > 0) {
1303 PyObject *flags_result;
1304 PyObject *sep = PyUnicode_FromString("|");
1305 if (!sep)
1306 goto done;
1307 flags_result = PyUnicode_Join(sep, flag_items);
1308 Py_DECREF(sep);
1309 if (!flags_result)
1310 goto done;
1311 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1312 obj->pattern, flags_result);
1313 Py_DECREF(flags_result);
1314 }
1315 else {
1316 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1317 }
1318
1319done:
1320 Py_DECREF(flag_items);
1321 return result;
1322}
1323
Raymond Hettinger94478742004-09-24 04:31:19 +00001324PyDoc_STRVAR(pattern_match_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001325"match(string[, pos[, endpos]]) -> match object or None.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001326 Matches zero or more characters at the beginning of the string");
1327
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02001328PyDoc_STRVAR(pattern_fullmatch_doc,
1329"fullmatch(string[, pos[, endpos]]) -> match object or None.\n\
1330 Matches against all of the string");
1331
Raymond Hettinger94478742004-09-24 04:31:19 +00001332PyDoc_STRVAR(pattern_search_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001333"search(string[, pos[, endpos]]) -> match object or None.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001334 Scan through string looking for a match, and return a corresponding\n\
Andrew Svetlov0b64c142012-12-25 18:48:54 +02001335 match object instance. Return None if no position in the string matches.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001336
1337PyDoc_STRVAR(pattern_split_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001338"split(string[, maxsplit = 0]) -> list.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001339 Split string by the occurrences of pattern.");
1340
1341PyDoc_STRVAR(pattern_findall_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001342"findall(string[, pos[, endpos]]) -> list.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001343 Return a list of all non-overlapping matches of pattern in string.");
1344
1345PyDoc_STRVAR(pattern_finditer_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001346"finditer(string[, pos[, endpos]]) -> iterator.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001347 Return an iterator over all non-overlapping matches for the \n\
1348 RE pattern in string. For each match, the iterator returns a\n\
1349 match object.");
1350
1351PyDoc_STRVAR(pattern_sub_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001352"sub(repl, string[, count = 0]) -> newstring.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001353 Return the string obtained by replacing the leftmost non-overlapping\n\
Tim Peters3d563502006-01-21 02:47:53 +00001354 occurrences of pattern in string by the replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001355
1356PyDoc_STRVAR(pattern_subn_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001357"subn(repl, string[, count = 0]) -> (newstring, number of subs)\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001358 Return the tuple (new_string, number_of_subs_made) found by replacing\n\
1359 the leftmost non-overlapping occurrences of pattern with the\n\
Tim Peters3d563502006-01-21 02:47:53 +00001360 replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001361
1362PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
1363
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001364static PyMethodDef pattern_methods[] = {
Tim Peters3d563502006-01-21 02:47:53 +00001365 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001366 pattern_match_doc},
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02001367 {"fullmatch", (PyCFunction) pattern_fullmatch, METH_VARARGS|METH_KEYWORDS,
1368 pattern_fullmatch_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001369 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001370 pattern_search_doc},
Raymond Hettinger94478742004-09-24 04:31:19 +00001371 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001372 pattern_sub_doc},
Raymond Hettinger94478742004-09-24 04:31:19 +00001373 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001374 pattern_subn_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001375 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001376 pattern_split_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001377 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001378 pattern_findall_doc},
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001379 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001380 pattern_finditer_doc},
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001381 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS|METH_KEYWORDS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001382 {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
1383 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001384 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001385};
1386
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00001387#define PAT_OFF(x) offsetof(PatternObject, x)
1388static PyMemberDef pattern_members[] = {
1389 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
1390 {"flags", T_INT, PAT_OFF(flags), READONLY},
1391 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
1392 {"groupindex", T_OBJECT, PAT_OFF(groupindex), READONLY},
1393 {NULL} /* Sentinel */
1394};
Guido van Rossumb700df92000-03-31 14:59:30 +00001395
Neal Norwitz57c179c2006-03-22 07:18:02 +00001396static PyTypeObject Pattern_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001397 PyVarObject_HEAD_INIT(NULL, 0)
1398 "_" SRE_MODULE ".SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001399 sizeof(PatternObject), sizeof(SRE_CODE),
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001400 (destructor)pattern_dealloc, /* tp_dealloc */
1401 0, /* tp_print */
1402 0, /* tp_getattr */
1403 0, /* tp_setattr */
1404 0, /* tp_reserved */
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001405 (reprfunc)pattern_repr, /* tp_repr */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001406 0, /* tp_as_number */
1407 0, /* tp_as_sequence */
1408 0, /* tp_as_mapping */
1409 0, /* tp_hash */
1410 0, /* tp_call */
1411 0, /* tp_str */
1412 0, /* tp_getattro */
1413 0, /* tp_setattro */
1414 0, /* tp_as_buffer */
1415 Py_TPFLAGS_DEFAULT, /* tp_flags */
1416 pattern_doc, /* tp_doc */
1417 0, /* tp_traverse */
1418 0, /* tp_clear */
1419 0, /* tp_richcompare */
1420 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
1421 0, /* tp_iter */
1422 0, /* tp_iternext */
1423 pattern_methods, /* tp_methods */
1424 pattern_members, /* tp_members */
Guido van Rossumb700df92000-03-31 14:59:30 +00001425};
1426
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001427static int _validate(PatternObject *self); /* Forward */
1428
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001429static PyObject *
1430_compile(PyObject* self_, PyObject* args)
1431{
1432 /* "compile" pattern descriptor to pattern object */
1433
1434 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001435 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001436
1437 PyObject* pattern;
1438 int flags = 0;
1439 PyObject* code;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001440 Py_ssize_t groups = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001441 PyObject* groupindex = NULL;
1442 PyObject* indexgroup = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001443
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001444 if (!PyArg_ParseTuple(args, "OiO!nOO", &pattern, &flags,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001445 &PyList_Type, &code, &groups,
1446 &groupindex, &indexgroup))
1447 return NULL;
1448
1449 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001450 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001451 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1452 if (!self)
1453 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001454 self->weakreflist = NULL;
1455 self->pattern = NULL;
1456 self->groupindex = NULL;
1457 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001458
1459 self->codesize = n;
1460
1461 for (i = 0; i < n; i++) {
1462 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001463 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001464 self->code[i] = (SRE_CODE) value;
1465 if ((unsigned long) self->code[i] != value) {
1466 PyErr_SetString(PyExc_OverflowError,
1467 "regular expression code size limit exceeded");
1468 break;
1469 }
1470 }
1471
1472 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001473 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001474 return NULL;
1475 }
1476
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001477 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001478 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001479 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001480 else {
1481 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001482 int charsize;
1483 Py_buffer view;
1484 view.buf = NULL;
1485 if (!getstring(pattern, &p_length, &self->isbytes,
1486 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001487 Py_DECREF(self);
1488 return NULL;
1489 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001490 if (view.buf)
1491 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001492 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001493
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001494 Py_INCREF(pattern);
1495 self->pattern = pattern;
1496
1497 self->flags = flags;
1498
1499 self->groups = groups;
1500
1501 Py_XINCREF(groupindex);
1502 self->groupindex = groupindex;
1503
1504 Py_XINCREF(indexgroup);
1505 self->indexgroup = indexgroup;
1506
1507 self->weakreflist = NULL;
1508
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001509 if (!_validate(self)) {
1510 Py_DECREF(self);
1511 return NULL;
1512 }
1513
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001514 return (PyObject*) self;
1515}
1516
Guido van Rossumb700df92000-03-31 14:59:30 +00001517/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001518/* Code validation */
1519
1520/* To learn more about this code, have a look at the _compile() function in
1521 Lib/sre_compile.py. The validation functions below checks the code array
1522 for conformance with the code patterns generated there.
1523
1524 The nice thing about the generated code is that it is position-independent:
1525 all jumps are relative jumps forward. Also, jumps don't cross each other:
1526 the target of a later jump is always earlier than the target of an earlier
1527 jump. IOW, this is okay:
1528
1529 J---------J-------T--------T
1530 \ \_____/ /
1531 \______________________/
1532
1533 but this is not:
1534
1535 J---------J-------T--------T
1536 \_________\_____/ /
1537 \____________/
1538
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001539 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001540*/
1541
1542/* Defining this one enables tracing of the validator */
1543#undef VVERBOSE
1544
1545/* Trace macro for the validator */
1546#if defined(VVERBOSE)
1547#define VTRACE(v) printf v
1548#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001549#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001550#endif
1551
1552/* Report failure */
1553#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1554
1555/* Extract opcode, argument, or skip count from code array */
1556#define GET_OP \
1557 do { \
1558 VTRACE(("%p: ", code)); \
1559 if (code >= end) FAIL; \
1560 op = *code++; \
1561 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1562 } while (0)
1563#define GET_ARG \
1564 do { \
1565 VTRACE(("%p= ", code)); \
1566 if (code >= end) FAIL; \
1567 arg = *code++; \
1568 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1569 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001570#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001571 do { \
1572 VTRACE(("%p= ", code)); \
1573 if (code >= end) FAIL; \
1574 skip = *code; \
1575 VTRACE(("%lu (skip to %p)\n", \
1576 (unsigned long)skip, code+skip)); \
Victor Stinner1fa174a2013-08-28 02:06:21 +02001577 if (skip-adj > (Py_uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001578 FAIL; \
1579 code++; \
1580 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001581#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001582
1583static int
1584_validate_charset(SRE_CODE *code, SRE_CODE *end)
1585{
1586 /* Some variables are manipulated by the macros above */
1587 SRE_CODE op;
1588 SRE_CODE arg;
1589 SRE_CODE offset;
1590 int i;
1591
1592 while (code < end) {
1593 GET_OP;
1594 switch (op) {
1595
1596 case SRE_OP_NEGATE:
1597 break;
1598
1599 case SRE_OP_LITERAL:
1600 GET_ARG;
1601 break;
1602
1603 case SRE_OP_RANGE:
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +02001604 case SRE_OP_RANGE_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001605 GET_ARG;
1606 GET_ARG;
1607 break;
1608
1609 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001610 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001611 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001612 FAIL;
1613 code += offset;
1614 break;
1615
1616 case SRE_OP_BIGCHARSET:
1617 GET_ARG; /* Number of blocks */
1618 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001619 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001620 FAIL;
1621 /* Make sure that each byte points to a valid block */
1622 for (i = 0; i < 256; i++) {
1623 if (((unsigned char *)code)[i] >= arg)
1624 FAIL;
1625 }
1626 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001627 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001628 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001629 FAIL;
1630 code += offset;
1631 break;
1632
1633 case SRE_OP_CATEGORY:
1634 GET_ARG;
1635 switch (arg) {
1636 case SRE_CATEGORY_DIGIT:
1637 case SRE_CATEGORY_NOT_DIGIT:
1638 case SRE_CATEGORY_SPACE:
1639 case SRE_CATEGORY_NOT_SPACE:
1640 case SRE_CATEGORY_WORD:
1641 case SRE_CATEGORY_NOT_WORD:
1642 case SRE_CATEGORY_LINEBREAK:
1643 case SRE_CATEGORY_NOT_LINEBREAK:
1644 case SRE_CATEGORY_LOC_WORD:
1645 case SRE_CATEGORY_LOC_NOT_WORD:
1646 case SRE_CATEGORY_UNI_DIGIT:
1647 case SRE_CATEGORY_UNI_NOT_DIGIT:
1648 case SRE_CATEGORY_UNI_SPACE:
1649 case SRE_CATEGORY_UNI_NOT_SPACE:
1650 case SRE_CATEGORY_UNI_WORD:
1651 case SRE_CATEGORY_UNI_NOT_WORD:
1652 case SRE_CATEGORY_UNI_LINEBREAK:
1653 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1654 break;
1655 default:
1656 FAIL;
1657 }
1658 break;
1659
1660 default:
1661 FAIL;
1662
1663 }
1664 }
1665
1666 return 1;
1667}
1668
1669static int
1670_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1671{
1672 /* Some variables are manipulated by the macros above */
1673 SRE_CODE op;
1674 SRE_CODE arg;
1675 SRE_CODE skip;
1676
1677 VTRACE(("code=%p, end=%p\n", code, end));
1678
1679 if (code > end)
1680 FAIL;
1681
1682 while (code < end) {
1683 GET_OP;
1684 switch (op) {
1685
1686 case SRE_OP_MARK:
1687 /* We don't check whether marks are properly nested; the
1688 sre_match() code is robust even if they don't, and the worst
1689 you can get is nonsensical match results. */
1690 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001691 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001692 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1693 FAIL;
1694 }
1695 break;
1696
1697 case SRE_OP_LITERAL:
1698 case SRE_OP_NOT_LITERAL:
1699 case SRE_OP_LITERAL_IGNORE:
1700 case SRE_OP_NOT_LITERAL_IGNORE:
1701 GET_ARG;
1702 /* The arg is just a character, nothing to check */
1703 break;
1704
1705 case SRE_OP_SUCCESS:
1706 case SRE_OP_FAILURE:
1707 /* Nothing to check; these normally end the matching process */
1708 break;
1709
1710 case SRE_OP_AT:
1711 GET_ARG;
1712 switch (arg) {
1713 case SRE_AT_BEGINNING:
1714 case SRE_AT_BEGINNING_STRING:
1715 case SRE_AT_BEGINNING_LINE:
1716 case SRE_AT_END:
1717 case SRE_AT_END_LINE:
1718 case SRE_AT_END_STRING:
1719 case SRE_AT_BOUNDARY:
1720 case SRE_AT_NON_BOUNDARY:
1721 case SRE_AT_LOC_BOUNDARY:
1722 case SRE_AT_LOC_NON_BOUNDARY:
1723 case SRE_AT_UNI_BOUNDARY:
1724 case SRE_AT_UNI_NON_BOUNDARY:
1725 break;
1726 default:
1727 FAIL;
1728 }
1729 break;
1730
1731 case SRE_OP_ANY:
1732 case SRE_OP_ANY_ALL:
1733 /* These have no operands */
1734 break;
1735
1736 case SRE_OP_IN:
1737 case SRE_OP_IN_IGNORE:
1738 GET_SKIP;
1739 /* Stop 1 before the end; we check the FAILURE below */
1740 if (!_validate_charset(code, code+skip-2))
1741 FAIL;
1742 if (code[skip-2] != SRE_OP_FAILURE)
1743 FAIL;
1744 code += skip-1;
1745 break;
1746
1747 case SRE_OP_INFO:
1748 {
1749 /* A minimal info field is
1750 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1751 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1752 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001753 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001754 SRE_CODE *newcode;
1755 GET_SKIP;
1756 newcode = code+skip-1;
1757 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001758 GET_ARG;
1759 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001760 /* Check that only valid flags are present */
1761 if ((flags & ~(SRE_INFO_PREFIX |
1762 SRE_INFO_LITERAL |
1763 SRE_INFO_CHARSET)) != 0)
1764 FAIL;
1765 /* PREFIX and CHARSET are mutually exclusive */
1766 if ((flags & SRE_INFO_PREFIX) &&
1767 (flags & SRE_INFO_CHARSET))
1768 FAIL;
1769 /* LITERAL implies PREFIX */
1770 if ((flags & SRE_INFO_LITERAL) &&
1771 !(flags & SRE_INFO_PREFIX))
1772 FAIL;
1773 /* Validate the prefix */
1774 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001775 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001776 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001777 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001778 /* Here comes the prefix string */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001779 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001780 FAIL;
1781 code += prefix_len;
1782 /* And here comes the overlap table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001783 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001784 FAIL;
1785 /* Each overlap value should be < prefix_len */
1786 for (i = 0; i < prefix_len; i++) {
1787 if (code[i] >= prefix_len)
1788 FAIL;
1789 }
1790 code += prefix_len;
1791 }
1792 /* Validate the charset */
1793 if (flags & SRE_INFO_CHARSET) {
1794 if (!_validate_charset(code, newcode-1))
1795 FAIL;
1796 if (newcode[-1] != SRE_OP_FAILURE)
1797 FAIL;
1798 code = newcode;
1799 }
1800 else if (code != newcode) {
1801 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1802 FAIL;
1803 }
1804 }
1805 break;
1806
1807 case SRE_OP_BRANCH:
1808 {
1809 SRE_CODE *target = NULL;
1810 for (;;) {
1811 GET_SKIP;
1812 if (skip == 0)
1813 break;
1814 /* Stop 2 before the end; we check the JUMP below */
1815 if (!_validate_inner(code, code+skip-3, groups))
1816 FAIL;
1817 code += skip-3;
1818 /* Check that it ends with a JUMP, and that each JUMP
1819 has the same target */
1820 GET_OP;
1821 if (op != SRE_OP_JUMP)
1822 FAIL;
1823 GET_SKIP;
1824 if (target == NULL)
1825 target = code+skip-1;
1826 else if (code+skip-1 != target)
1827 FAIL;
1828 }
1829 }
1830 break;
1831
1832 case SRE_OP_REPEAT_ONE:
1833 case SRE_OP_MIN_REPEAT_ONE:
1834 {
1835 SRE_CODE min, max;
1836 GET_SKIP;
1837 GET_ARG; min = arg;
1838 GET_ARG; max = arg;
1839 if (min > max)
1840 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001841 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001842 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001843 if (!_validate_inner(code, code+skip-4, groups))
1844 FAIL;
1845 code += skip-4;
1846 GET_OP;
1847 if (op != SRE_OP_SUCCESS)
1848 FAIL;
1849 }
1850 break;
1851
1852 case SRE_OP_REPEAT:
1853 {
1854 SRE_CODE min, max;
1855 GET_SKIP;
1856 GET_ARG; min = arg;
1857 GET_ARG; max = arg;
1858 if (min > max)
1859 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001860 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001861 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001862 if (!_validate_inner(code, code+skip-3, groups))
1863 FAIL;
1864 code += skip-3;
1865 GET_OP;
1866 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1867 FAIL;
1868 }
1869 break;
1870
1871 case SRE_OP_GROUPREF:
1872 case SRE_OP_GROUPREF_IGNORE:
1873 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001874 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001875 FAIL;
1876 break;
1877
1878 case SRE_OP_GROUPREF_EXISTS:
1879 /* The regex syntax for this is: '(?(group)then|else)', where
1880 'group' is either an integer group number or a group name,
1881 'then' and 'else' are sub-regexes, and 'else' is optional. */
1882 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001883 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001884 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001885 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001886 code--; /* The skip is relative to the first arg! */
1887 /* There are two possibilities here: if there is both a 'then'
1888 part and an 'else' part, the generated code looks like:
1889
1890 GROUPREF_EXISTS
1891 <group>
1892 <skipyes>
1893 ...then part...
1894 JUMP
1895 <skipno>
1896 (<skipyes> jumps here)
1897 ...else part...
1898 (<skipno> jumps here)
1899
1900 If there is only a 'then' part, it looks like:
1901
1902 GROUPREF_EXISTS
1903 <group>
1904 <skip>
1905 ...then part...
1906 (<skip> jumps here)
1907
1908 There is no direct way to decide which it is, and we don't want
1909 to allow arbitrary jumps anywhere in the code; so we just look
1910 for a JUMP opcode preceding our skip target.
1911 */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001912 if (skip >= 3 && skip-3 < (Py_uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001913 code[skip-3] == SRE_OP_JUMP)
1914 {
1915 VTRACE(("both then and else parts present\n"));
1916 if (!_validate_inner(code+1, code+skip-3, groups))
1917 FAIL;
1918 code += skip-2; /* Position after JUMP, at <skipno> */
1919 GET_SKIP;
1920 if (!_validate_inner(code, code+skip-1, groups))
1921 FAIL;
1922 code += skip-1;
1923 }
1924 else {
1925 VTRACE(("only a then part present\n"));
1926 if (!_validate_inner(code+1, code+skip-1, groups))
1927 FAIL;
1928 code += skip-1;
1929 }
1930 break;
1931
1932 case SRE_OP_ASSERT:
1933 case SRE_OP_ASSERT_NOT:
1934 GET_SKIP;
1935 GET_ARG; /* 0 for lookahead, width for lookbehind */
1936 code--; /* Back up over arg to simplify math below */
1937 if (arg & 0x80000000)
1938 FAIL; /* Width too large */
1939 /* Stop 1 before the end; we check the SUCCESS below */
1940 if (!_validate_inner(code+1, code+skip-2, groups))
1941 FAIL;
1942 code += skip-2;
1943 GET_OP;
1944 if (op != SRE_OP_SUCCESS)
1945 FAIL;
1946 break;
1947
1948 default:
1949 FAIL;
1950
1951 }
1952 }
1953
1954 VTRACE(("okay\n"));
1955 return 1;
1956}
1957
1958static int
1959_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1960{
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001961 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1962 code >= end || end[-1] != SRE_OP_SUCCESS)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001963 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001964 return _validate_inner(code, end-1, groups);
1965}
1966
1967static int
1968_validate(PatternObject *self)
1969{
1970 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1971 {
1972 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1973 return 0;
1974 }
1975 else
1976 VTRACE(("Success!\n"));
1977 return 1;
1978}
1979
1980/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001981/* match methods */
1982
1983static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001984match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001985{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001986 Py_XDECREF(self->regs);
1987 Py_XDECREF(self->string);
1988 Py_DECREF(self->pattern);
1989 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001990}
1991
1992static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001993match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001994{
Serhiy Storchaka25324972013-10-16 12:46:28 +03001995 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001996 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001997 Py_buffer view;
1998 PyObject *result;
1999 void* ptr;
2000
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002001 if (index < 0 || index >= self->groups) {
2002 /* raise IndexError if we were given a bad group number */
2003 PyErr_SetString(
2004 PyExc_IndexError,
2005 "no such group"
2006 );
2007 return NULL;
2008 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002009
Fredrik Lundh6f013982000-07-03 18:44:21 +00002010 index *= 2;
2011
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002012 if (self->string == Py_None || self->mark[index] < 0) {
2013 /* return default value if the string or group is undefined */
2014 Py_INCREF(def);
2015 return def;
2016 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002017
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002018 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03002019 if (ptr == NULL)
2020 return NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002021 result = getslice(isbytes, ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +03002022 self->string, self->mark[index], self->mark[index+1]);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002023 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03002024 PyBuffer_Release(&view);
2025 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002026}
2027
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002028static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002029match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002030{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002031 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002032
Guido van Rossumddefaf32007-01-14 03:31:43 +00002033 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002034 /* Default value */
2035 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00002036
Christian Heimes217cfd12007-12-02 14:31:20 +00002037 if (PyLong_Check(index))
2038 return PyLong_AsSsize_t(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00002039
Fredrik Lundh6f013982000-07-03 18:44:21 +00002040 i = -1;
2041
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002042 if (self->pattern->groupindex) {
2043 index = PyObject_GetItem(self->pattern->groupindex, index);
2044 if (index) {
Neal Norwitz1fe5f382007-08-31 04:32:55 +00002045 if (PyLong_Check(index))
Christian Heimes217cfd12007-12-02 14:31:20 +00002046 i = PyLong_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00002047 Py_DECREF(index);
2048 } else
2049 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002050 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002051
2052 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002053}
2054
2055static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002056match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002057{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002058 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002059}
2060
2061static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002062match_expand(MatchObject* self, PyObject* ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002063{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002064 /* delegate to Python code */
2065 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00002066 SRE_PY_MODULE, "_expand",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002067 PyTuple_Pack(3, self->pattern, self, ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002068 );
2069}
2070
2071static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002072match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002073{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002074 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002075 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002076
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002077 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002078
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002079 switch (size) {
2080 case 0:
2081 result = match_getslice(self, Py_False, Py_None);
2082 break;
2083 case 1:
2084 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2085 break;
2086 default:
2087 /* fetch multiple items */
2088 result = PyTuple_New(size);
2089 if (!result)
2090 return NULL;
2091 for (i = 0; i < size; i++) {
2092 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002093 self, PyTuple_GET_ITEM(args, i), Py_None
2094 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002095 if (!item) {
2096 Py_DECREF(result);
2097 return NULL;
2098 }
2099 PyTuple_SET_ITEM(result, i, item);
2100 }
2101 break;
2102 }
2103 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002104}
2105
2106static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002107match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002108{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002109 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002110 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002111
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002112 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002113 static char* kwlist[] = { "default", NULL };
Fredrik Lundh562586e2000-10-03 20:43:34 +00002114 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002115 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002116
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002117 result = PyTuple_New(self->groups-1);
2118 if (!result)
2119 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002120
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002121 for (index = 1; index < self->groups; index++) {
2122 PyObject* item;
2123 item = match_getslice_by_index(self, index, def);
2124 if (!item) {
2125 Py_DECREF(result);
2126 return NULL;
2127 }
2128 PyTuple_SET_ITEM(result, index-1, item);
2129 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002130
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002131 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002132}
2133
2134static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002135match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002136{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002137 PyObject* result;
2138 PyObject* keys;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002139 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002140
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002141 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002142 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00002143 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002144 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002145
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002146 result = PyDict_New();
2147 if (!result || !self->pattern->groupindex)
2148 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002149
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002150 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002151 if (!keys)
2152 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002153
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002154 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002155 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002156 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002157 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002158 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002159 if (!key)
2160 goto failed;
2161 value = match_getslice(self, key, def);
2162 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002163 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002164 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002165 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002166 status = PyDict_SetItem(result, key, value);
2167 Py_DECREF(value);
2168 if (status < 0)
2169 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002170 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002171
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002172 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002173
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002174 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002175
2176failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00002177 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002178 Py_DECREF(result);
2179 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002180}
2181
2182static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002183match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002184{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002185 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002186
Guido van Rossumddefaf32007-01-14 03:31:43 +00002187 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002188 if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002189 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002190
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002191 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002192
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002193 if (index < 0 || index >= self->groups) {
2194 PyErr_SetString(
2195 PyExc_IndexError,
2196 "no such group"
2197 );
2198 return NULL;
2199 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002200
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002201 /* mark is -1 if group is undefined */
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002202 return PyLong_FromSsize_t(self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002203}
2204
2205static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002206match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002207{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002208 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002209
Guido van Rossumddefaf32007-01-14 03:31:43 +00002210 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002211 if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002212 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002213
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002214 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002215
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002216 if (index < 0 || index >= self->groups) {
2217 PyErr_SetString(
2218 PyExc_IndexError,
2219 "no such group"
2220 );
2221 return NULL;
2222 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002223
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002224 /* mark is -1 if group is undefined */
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002225 return PyLong_FromSsize_t(self->mark[index*2+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002226}
2227
2228LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002229_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002230{
2231 PyObject* pair;
2232 PyObject* item;
2233
2234 pair = PyTuple_New(2);
2235 if (!pair)
2236 return NULL;
2237
Christian Heimes217cfd12007-12-02 14:31:20 +00002238 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002239 if (!item)
2240 goto error;
2241 PyTuple_SET_ITEM(pair, 0, item);
2242
Christian Heimes217cfd12007-12-02 14:31:20 +00002243 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002244 if (!item)
2245 goto error;
2246 PyTuple_SET_ITEM(pair, 1, item);
2247
2248 return pair;
2249
2250 error:
2251 Py_DECREF(pair);
2252 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002253}
2254
2255static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002256match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002257{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002258 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002259
Guido van Rossumddefaf32007-01-14 03:31:43 +00002260 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002261 if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002262 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002263
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002264 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002265
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002266 if (index < 0 || index >= self->groups) {
2267 PyErr_SetString(
2268 PyExc_IndexError,
2269 "no such group"
2270 );
2271 return NULL;
2272 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002273
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002274 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002275 return _pair(self->mark[index*2], self->mark[index*2+1]);
2276}
2277
2278static PyObject*
2279match_regs(MatchObject* self)
2280{
2281 PyObject* regs;
2282 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002283 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002284
2285 regs = PyTuple_New(self->groups);
2286 if (!regs)
2287 return NULL;
2288
2289 for (index = 0; index < self->groups; index++) {
2290 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2291 if (!item) {
2292 Py_DECREF(regs);
2293 return NULL;
2294 }
2295 PyTuple_SET_ITEM(regs, index, item);
2296 }
2297
2298 Py_INCREF(regs);
2299 self->regs = regs;
2300
2301 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002302}
2303
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002304static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002305match_copy(MatchObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002306{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002307#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002308 MatchObject* copy;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002309 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00002310
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002311 slots = 2 * (self->pattern->groups+1);
2312
2313 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2314 if (!copy)
2315 return NULL;
2316
2317 /* this value a constant, but any compiler should be able to
2318 figure that out all by itself */
2319 offset = offsetof(MatchObject, string);
2320
2321 Py_XINCREF(self->pattern);
2322 Py_XINCREF(self->string);
2323 Py_XINCREF(self->regs);
2324
2325 memcpy((char*) copy + offset, (char*) self + offset,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002326 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002327
2328 return (PyObject*) copy;
2329#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002330 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002331 return NULL;
2332#endif
2333}
2334
2335static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002336match_deepcopy(MatchObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002337{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002338#ifdef USE_BUILTIN_COPY
2339 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002340
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002341 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002342 if (!copy)
2343 return NULL;
2344
2345 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2346 !deepcopy(&copy->string, memo) ||
2347 !deepcopy(&copy->regs, memo)) {
2348 Py_DECREF(copy);
2349 return NULL;
2350 }
2351
2352#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002353 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2354 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002355#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002356}
2357
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002358PyDoc_STRVAR(match_doc,
2359"The result of re.match() and re.search().\n\
2360Match objects always have a boolean value of True.");
2361
2362PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002363"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002364 Return subgroup(s) of the match by indices or names.\n\
2365 For 0 returns the entire match.");
2366
2367PyDoc_STRVAR(match_start_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002368"start([group=0]) -> int.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002369 Return index of the start of the substring matched by group.");
2370
2371PyDoc_STRVAR(match_end_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002372"end([group=0]) -> int.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002373 Return index of the end of the substring matched by group.");
2374
2375PyDoc_STRVAR(match_span_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002376"span([group]) -> tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002377 For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).");
2378
2379PyDoc_STRVAR(match_groups_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002380"groups([default=None]) -> tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002381 Return a tuple containing all the subgroups of the match, from 1.\n\
2382 The default argument is used for groups\n\
2383 that did not participate in the match");
2384
2385PyDoc_STRVAR(match_groupdict_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002386"groupdict([default=None]) -> dict.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002387 Return a dictionary containing all the named subgroups of the match,\n\
2388 keyed by the subgroup name. The default argument is used for groups\n\
2389 that did not participate in the match");
2390
2391PyDoc_STRVAR(match_expand_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002392"expand(template) -> str.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002393 Return the string obtained by doing backslash substitution\n\
2394 on the string template, as done by the sub() method.");
2395
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002396static PyMethodDef match_methods[] = {
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002397 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2398 {"start", (PyCFunction) match_start, METH_VARARGS, match_start_doc},
2399 {"end", (PyCFunction) match_end, METH_VARARGS, match_end_doc},
2400 {"span", (PyCFunction) match_span, METH_VARARGS, match_span_doc},
2401 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS,
2402 match_groups_doc},
2403 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS,
2404 match_groupdict_doc},
2405 {"expand", (PyCFunction) match_expand, METH_O, match_expand_doc},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002406 {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
2407 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002408 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002409};
2410
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002411static PyObject *
2412match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002413{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002414 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002415 return PyLong_FromSsize_t(self->lastindex);
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002416 Py_INCREF(Py_None);
2417 return Py_None;
Guido van Rossumb700df92000-03-31 14:59:30 +00002418}
2419
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002420static PyObject *
2421match_lastgroup_get(MatchObject *self)
2422{
2423 if (self->pattern->indexgroup && self->lastindex >= 0) {
2424 PyObject* result = PySequence_GetItem(
2425 self->pattern->indexgroup, self->lastindex
2426 );
2427 if (result)
2428 return result;
2429 PyErr_Clear();
2430 }
2431 Py_INCREF(Py_None);
2432 return Py_None;
2433}
2434
2435static PyObject *
2436match_regs_get(MatchObject *self)
2437{
2438 if (self->regs) {
2439 Py_INCREF(self->regs);
2440 return self->regs;
2441 } else
2442 return match_regs(self);
2443}
2444
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002445static PyObject *
2446match_repr(MatchObject *self)
2447{
2448 PyObject *result;
2449 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2450 if (group0 == NULL)
2451 return NULL;
2452 result = PyUnicode_FromFormat(
2453 "<%s object; span=(%d, %d), match=%.50R>",
2454 Py_TYPE(self)->tp_name,
2455 self->mark[0], self->mark[1], group0);
2456 Py_DECREF(group0);
2457 return result;
2458}
2459
2460
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002461static PyGetSetDef match_getset[] = {
2462 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
2463 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
2464 {"regs", (getter)match_regs_get, (setter)NULL},
2465 {NULL}
2466};
2467
2468#define MATCH_OFF(x) offsetof(MatchObject, x)
2469static PyMemberDef match_members[] = {
2470 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
2471 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
2472 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
2473 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
2474 {NULL}
2475};
2476
Guido van Rossumb700df92000-03-31 14:59:30 +00002477/* FIXME: implement setattr("string", None) as a special case (to
2478 detach the associated string, if any */
2479
Neal Norwitz57c179c2006-03-22 07:18:02 +00002480static PyTypeObject Match_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002481 PyVarObject_HEAD_INIT(NULL,0)
2482 "_" SRE_MODULE ".SRE_Match",
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002483 sizeof(MatchObject), sizeof(Py_ssize_t),
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002484 (destructor)match_dealloc, /* tp_dealloc */
2485 0, /* tp_print */
2486 0, /* tp_getattr */
2487 0, /* tp_setattr */
2488 0, /* tp_reserved */
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002489 (reprfunc)match_repr, /* tp_repr */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002490 0, /* tp_as_number */
2491 0, /* tp_as_sequence */
2492 0, /* tp_as_mapping */
2493 0, /* tp_hash */
2494 0, /* tp_call */
2495 0, /* tp_str */
2496 0, /* tp_getattro */
2497 0, /* tp_setattro */
2498 0, /* tp_as_buffer */
2499 Py_TPFLAGS_DEFAULT, /* tp_flags */
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002500 match_doc, /* tp_doc */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002501 0, /* tp_traverse */
2502 0, /* tp_clear */
2503 0, /* tp_richcompare */
2504 0, /* tp_weaklistoffset */
2505 0, /* tp_iter */
2506 0, /* tp_iternext */
2507 match_methods, /* tp_methods */
2508 match_members, /* tp_members */
2509 match_getset, /* tp_getset */
Guido van Rossumb700df92000-03-31 14:59:30 +00002510};
2511
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002512static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002513pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002514{
2515 /* create match object (from state object) */
2516
2517 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002518 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002519 char* base;
2520 int n;
2521
2522 if (status > 0) {
2523
2524 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002525 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002526 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2527 2*(pattern->groups+1));
2528 if (!match)
2529 return NULL;
2530
2531 Py_INCREF(pattern);
2532 match->pattern = pattern;
2533
2534 Py_INCREF(state->string);
2535 match->string = state->string;
2536
2537 match->regs = NULL;
2538 match->groups = pattern->groups+1;
2539
2540 /* fill in group slices */
2541
2542 base = (char*) state->beginning;
2543 n = state->charsize;
2544
2545 match->mark[0] = ((char*) state->start - base) / n;
2546 match->mark[1] = ((char*) state->ptr - base) / n;
2547
2548 for (i = j = 0; i < pattern->groups; i++, j+=2)
2549 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2550 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2551 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2552 } else
2553 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2554
2555 match->pos = state->pos;
2556 match->endpos = state->endpos;
2557
2558 match->lastindex = state->lastindex;
2559
2560 return (PyObject*) match;
2561
2562 } else if (status == 0) {
2563
2564 /* no match */
2565 Py_INCREF(Py_None);
2566 return Py_None;
2567
2568 }
2569
2570 /* internal error */
2571 pattern_error(status);
2572 return NULL;
2573}
2574
2575
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002576/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002577/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002578
2579static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002580scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002581{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002582 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002583 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002584 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002585}
2586
2587static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002588scanner_match(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002589{
2590 SRE_STATE* state = &self->state;
2591 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002592 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002593
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002594 state_reset(state);
2595
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002596 state->ptr = state->start;
2597
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03002598 status = sre_match(state, PatternObject_GetCode(self->pattern), 0);
Thomas Wouters89f507f2006-12-13 04:49:30 +00002599 if (PyErr_Occurred())
2600 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002601
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002602 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002603 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002604
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00002605 if (status == 0 || state->ptr == state->start)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002606 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002607 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002608 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002609
2610 return match;
2611}
2612
2613
2614static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002615scanner_search(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002616{
2617 SRE_STATE* state = &self->state;
2618 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002619 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002620
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002621 state_reset(state);
2622
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002623 state->ptr = state->start;
2624
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002625 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002626 if (PyErr_Occurred())
2627 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002628
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002629 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002630 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002631
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00002632 if (status == 0 || state->ptr == state->start)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002633 state->start = (void*) ((char*) state->ptr + state->charsize);
2634 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002635 state->start = state->ptr;
2636
2637 return match;
2638}
2639
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002640static PyMethodDef scanner_methods[] = {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002641 {"match", (PyCFunction) scanner_match, METH_NOARGS},
2642 {"search", (PyCFunction) scanner_search, METH_NOARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002643 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002644};
2645
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002646#define SCAN_OFF(x) offsetof(ScannerObject, x)
2647static PyMemberDef scanner_members[] = {
Ezio Melotti7c8c1ea2011-09-29 01:00:19 +03002648 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002649 {NULL} /* Sentinel */
2650};
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002651
Neal Norwitz57c179c2006-03-22 07:18:02 +00002652static PyTypeObject Scanner_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002653 PyVarObject_HEAD_INIT(NULL, 0)
2654 "_" SRE_MODULE ".SRE_Scanner",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002655 sizeof(ScannerObject), 0,
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002656 (destructor)scanner_dealloc,/* tp_dealloc */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002657 0, /* tp_print */
2658 0, /* tp_getattr */
2659 0, /* tp_setattr */
2660 0, /* tp_reserved */
2661 0, /* tp_repr */
2662 0, /* tp_as_number */
2663 0, /* tp_as_sequence */
2664 0, /* tp_as_mapping */
2665 0, /* tp_hash */
2666 0, /* tp_call */
2667 0, /* tp_str */
2668 0, /* tp_getattro */
2669 0, /* tp_setattro */
2670 0, /* tp_as_buffer */
2671 Py_TPFLAGS_DEFAULT, /* tp_flags */
2672 0, /* tp_doc */
2673 0, /* tp_traverse */
2674 0, /* tp_clear */
2675 0, /* tp_richcompare */
2676 0, /* tp_weaklistoffset */
2677 0, /* tp_iter */
2678 0, /* tp_iternext */
2679 scanner_methods, /* tp_methods */
2680 scanner_members, /* tp_members */
2681 0, /* tp_getset */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002682};
2683
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002684static PyObject*
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06002685pattern_scanner(PatternObject* pattern, PyObject* args, PyObject* kw)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002686{
2687 /* create search state object */
2688
2689 ScannerObject* self;
2690
Serhiy Storchakaccdf3522014-03-06 11:28:32 +02002691 PyObject *string = NULL, *string2 = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002692 Py_ssize_t start = 0;
2693 Py_ssize_t end = PY_SSIZE_T_MAX;
Serhiy Storchakaccdf3522014-03-06 11:28:32 +02002694 static char* kwlist[] = { "string", "pos", "endpos", "source", NULL };
2695 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:scanner", kwlist,
2696 &string, &start, &end, &string2))
2697 return NULL;
2698
2699 string = fix_string_param(string, string2, "source");
2700 if (!string)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002701 return NULL;
2702
2703 /* create scanner object */
2704 self = PyObject_NEW(ScannerObject, &Scanner_Type);
2705 if (!self)
2706 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002707 self->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002708
2709 string = state_init(&self->state, pattern, string, start, end);
2710 if (!string) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002711 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002712 return NULL;
2713 }
2714
2715 Py_INCREF(pattern);
2716 self->pattern = (PyObject*) pattern;
2717
2718 return (PyObject*) self;
2719}
2720
Guido van Rossumb700df92000-03-31 14:59:30 +00002721static PyMethodDef _functions[] = {
Neal Norwitzb0493252002-03-31 14:44:22 +00002722 {"compile", _compile, METH_VARARGS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002723 {"getcodesize", sre_codesize, METH_NOARGS},
Neal Norwitzb0493252002-03-31 14:44:22 +00002724 {"getlower", sre_getlower, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002725 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002726};
2727
Martin v. Löwis1a214512008-06-11 05:26:20 +00002728static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002729 PyModuleDef_HEAD_INIT,
2730 "_" SRE_MODULE,
2731 NULL,
2732 -1,
2733 _functions,
2734 NULL,
2735 NULL,
2736 NULL,
2737 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002738};
2739
2740PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002741{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002742 PyObject* m;
2743 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002744 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002745
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002746 /* Patch object types */
2747 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2748 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002749 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002750
Martin v. Löwis1a214512008-06-11 05:26:20 +00002751 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002752 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002753 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002754 d = PyModule_GetDict(m);
2755
Christian Heimes217cfd12007-12-02 14:31:20 +00002756 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002757 if (x) {
2758 PyDict_SetItemString(d, "MAGIC", x);
2759 Py_DECREF(x);
2760 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002761
Christian Heimes217cfd12007-12-02 14:31:20 +00002762 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002763 if (x) {
2764 PyDict_SetItemString(d, "CODESIZE", x);
2765 Py_DECREF(x);
2766 }
2767
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002768 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2769 if (x) {
2770 PyDict_SetItemString(d, "MAXREPEAT", x);
2771 Py_DECREF(x);
2772 }
2773
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03002774 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2775 if (x) {
2776 PyDict_SetItemString(d, "MAXGROUPS", x);
2777 Py_DECREF(x);
2778 }
2779
Neal Norwitzfe537132007-08-26 03:55:15 +00002780 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002781 if (x) {
2782 PyDict_SetItemString(d, "copyright", x);
2783 Py_DECREF(x);
2784 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002785 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002786}
2787
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002788/* vim:ts=4:sw=4:et
2789*/