blob: 63778f4e6bfb134aa2f5ca92cfc3e8153a36940b [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000038static char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000063/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064
65/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000066#define USE_FAST_SEARCH
67
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000068/* enables copy/deepcopy handling (work in progress) */
69#undef USE_BUILTIN_COPY
70
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000071/* -------------------------------------------------------------------- */
72
Fredrik Lundh80946112000-06-29 18:03:25 +000073#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000074#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000075#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000076/* fastest possible local call under MSVC */
77#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000078#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000079#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080#else
81#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000082#endif
83
84/* error codes */
85#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000086#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000087#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000088#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000089#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000090
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000091#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000092#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000093#else
94#define TRACE(v)
95#endif
96
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000097/* -------------------------------------------------------------------- */
98/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000099
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000100#define SRE_IS_DIGIT(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300101 ((ch) < 128 && Py_ISDIGIT(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000102#define SRE_IS_SPACE(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300103 ((ch) < 128 && Py_ISSPACE(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000104#define SRE_IS_LINEBREAK(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300105 ((ch) == '\n')
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000106#define SRE_IS_ALNUM(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300107 ((ch) < 128 && Py_ISALNUM(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000108#define SRE_IS_WORD(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300109 ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
Guido van Rossumb700df92000-03-31 14:59:30 +0000110
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000111static unsigned int sre_lower(unsigned int ch)
112{
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300113 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000114}
115
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200116static unsigned int sre_upper(unsigned int ch)
117{
118 return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
119}
120
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000121/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000122/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
123 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000124#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000125#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
126
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000127static unsigned int sre_lower_locale(unsigned int ch)
128{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000129 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000130}
131
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200132static unsigned int sre_upper_locale(unsigned int ch)
133{
134 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
135}
136
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000137/* unicode-specific character predicates */
138
Victor Stinner0058b862011-09-29 03:27:47 +0200139#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
140#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
141#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
142#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
143#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000144
145static unsigned int sre_lower_unicode(unsigned int ch)
146{
Victor Stinner0058b862011-09-29 03:27:47 +0200147 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000148}
149
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200150static unsigned int sre_upper_unicode(unsigned int ch)
151{
152 return (unsigned int) Py_UNICODE_TOUPPER(ch);
153}
154
Guido van Rossumb700df92000-03-31 14:59:30 +0000155LOCAL(int)
156sre_category(SRE_CODE category, unsigned int ch)
157{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000158 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000159
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000160 case SRE_CATEGORY_DIGIT:
161 return SRE_IS_DIGIT(ch);
162 case SRE_CATEGORY_NOT_DIGIT:
163 return !SRE_IS_DIGIT(ch);
164 case SRE_CATEGORY_SPACE:
165 return SRE_IS_SPACE(ch);
166 case SRE_CATEGORY_NOT_SPACE:
167 return !SRE_IS_SPACE(ch);
168 case SRE_CATEGORY_WORD:
169 return SRE_IS_WORD(ch);
170 case SRE_CATEGORY_NOT_WORD:
171 return !SRE_IS_WORD(ch);
172 case SRE_CATEGORY_LINEBREAK:
173 return SRE_IS_LINEBREAK(ch);
174 case SRE_CATEGORY_NOT_LINEBREAK:
175 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000176
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000177 case SRE_CATEGORY_LOC_WORD:
178 return SRE_LOC_IS_WORD(ch);
179 case SRE_CATEGORY_LOC_NOT_WORD:
180 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000181
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000182 case SRE_CATEGORY_UNI_DIGIT:
183 return SRE_UNI_IS_DIGIT(ch);
184 case SRE_CATEGORY_UNI_NOT_DIGIT:
185 return !SRE_UNI_IS_DIGIT(ch);
186 case SRE_CATEGORY_UNI_SPACE:
187 return SRE_UNI_IS_SPACE(ch);
188 case SRE_CATEGORY_UNI_NOT_SPACE:
189 return !SRE_UNI_IS_SPACE(ch);
190 case SRE_CATEGORY_UNI_WORD:
191 return SRE_UNI_IS_WORD(ch);
192 case SRE_CATEGORY_UNI_NOT_WORD:
193 return !SRE_UNI_IS_WORD(ch);
194 case SRE_CATEGORY_UNI_LINEBREAK:
195 return SRE_UNI_IS_LINEBREAK(ch);
196 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
197 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000198 }
199 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000200}
201
202/* helpers */
203
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000204static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000205data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000206{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000207 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000208 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000209 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000210 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000211 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000212}
213
214static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000215data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000216{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000217 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000218 minsize = state->data_stack_base+size;
219 cursize = state->data_stack_size;
220 if (cursize < minsize) {
221 void* stack;
222 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300223 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000224 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000225 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000226 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000227 return SRE_ERROR_MEMORY;
228 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000229 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000230 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000231 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000232 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000233}
234
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000235/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000236
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300237#define SRE_CHAR Py_UCS1
238#define SIZEOF_SRE_CHAR 1
239#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300240#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000241
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300242/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000243
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300244#define SRE_CHAR Py_UCS2
245#define SIZEOF_SRE_CHAR 2
246#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300247#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000248
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300249/* generate 32-bit unicode version */
250
251#define SRE_CHAR Py_UCS4
252#define SIZEOF_SRE_CHAR 4
253#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300254#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000255
256/* -------------------------------------------------------------------- */
257/* factories and destructors */
258
259/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100260static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600261static PyObject*pattern_scanner(PatternObject*, PyObject*, PyObject* kw);
Guido van Rossumb700df92000-03-31 14:59:30 +0000262
263static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000264sre_codesize(PyObject* self, PyObject *unused)
Guido van Rossumb700df92000-03-31 14:59:30 +0000265{
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100266 return PyLong_FromSize_t(sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +0000267}
268
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000269static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +0000270sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000271{
272 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000273 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000274 return NULL;
275 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000276 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000277 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000278 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundhb389df32000-06-29 12:48:37 +0000279 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000280}
281
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000282LOCAL(void)
283state_reset(SRE_STATE* state)
284{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000285 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000286 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000287
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000288 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000289 state->lastindex = -1;
290
291 state->repeat = NULL;
292
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000293 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000294}
295
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000296static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200297getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300298 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600299 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000300{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000301 /* given a python object, return a data pointer, a length (in
302 characters), and a character size. return NULL if the object
303 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000304
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000305 /* Unicode objects do not support the buffer API. So, get the data
306 directly instead. */
307 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200308 if (PyUnicode_READY(string) == -1)
309 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200310 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200311 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300312 *p_isbytes = 0;
313 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000314 }
315
Victor Stinner0058b862011-09-29 03:27:47 +0200316 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300317 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
318 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
319 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000320 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000321
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300322 *p_length = view->len;
323 *p_charsize = 1;
324 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000325
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300326 if (view->buf == NULL) {
327 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
328 PyBuffer_Release(view);
329 view->buf = NULL;
330 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000331 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300332 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000333}
334
335LOCAL(PyObject*)
336state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000337 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000338{
339 /* prepare state object */
340
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000341 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300342 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000343 void* ptr;
344
345 memset(state, 0, sizeof(SRE_STATE));
346
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300347 state->mark = PyMem_New(void *, pattern->groups * 2);
348 if (!state->mark) {
349 PyErr_NoMemory();
350 goto err;
351 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000352 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000353 state->lastindex = -1;
354
Benjamin Petersone48944b2012-03-07 14:50:25 -0600355 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300356 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000357 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600358 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000359
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300360 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600361 PyErr_SetString(PyExc_TypeError,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300362 "can't use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600363 goto err;
364 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300365 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600366 PyErr_SetString(PyExc_TypeError,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300367 "can't use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600368 goto err;
369 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000370
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000371 /* adjust boundaries */
372 if (start < 0)
373 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000374 else if (start > length)
375 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000376
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000377 if (end < 0)
378 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000379 else if (end > length)
380 end = length;
381
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300382 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000383 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000384
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000385 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000386
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000387 state->start = (void*) ((char*) ptr + start * state->charsize);
388 state->end = (void*) ((char*) ptr + end * state->charsize);
389
390 Py_INCREF(string);
391 state->string = string;
392 state->pos = start;
393 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000394
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200395 if (pattern->flags & SRE_FLAG_LOCALE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000396 state->lower = sre_lower_locale;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200397 state->upper = sre_upper_locale;
398 }
399 else if (pattern->flags & SRE_FLAG_UNICODE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000400 state->lower = sre_lower_unicode;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200401 state->upper = sre_upper_unicode;
402 }
403 else {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000404 state->lower = sre_lower;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200405 state->upper = sre_upper;
406 }
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000407
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000408 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600409 err:
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300410 PyMem_Del(state->mark);
411 state->mark = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600412 if (state->buffer.buf)
413 PyBuffer_Release(&state->buffer);
414 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000415}
416
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000417LOCAL(void)
418state_fini(SRE_STATE* state)
419{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600420 if (state->buffer.buf)
421 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000422 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000423 data_stack_dealloc(state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300424 PyMem_Del(state->mark);
425 state->mark = NULL;
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000426}
427
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000428/* calculate offset from start of string */
429#define STATE_OFFSET(state, member)\
430 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
431
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000432LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300433getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300434 PyObject* string, Py_ssize_t start, Py_ssize_t end)
435{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300436 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300437 if (PyBytes_CheckExact(string) &&
438 start == 0 && end == PyBytes_GET_SIZE(string)) {
439 Py_INCREF(string);
440 return string;
441 }
442 return PyBytes_FromStringAndSize(
443 (const char *)ptr + start, end - start);
444 }
445 else {
446 return PyUnicode_Substring(string, start, end);
447 }
448}
449
450LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000451state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000452{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000453 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000454
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000455 index = (index - 1) * 2;
456
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000457 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000458 if (empty)
459 /* want empty string */
460 i = j = 0;
461 else {
462 Py_INCREF(Py_None);
463 return Py_None;
464 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000465 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000466 i = STATE_OFFSET(state, state->mark[index]);
467 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000468 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000469
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300470 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000471}
472
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000473static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100474pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000475{
476 switch (status) {
477 case SRE_ERROR_RECURSION_LIMIT:
478 PyErr_SetString(
479 PyExc_RuntimeError,
480 "maximum recursion limit exceeded"
481 );
482 break;
483 case SRE_ERROR_MEMORY:
484 PyErr_NoMemory();
485 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000486 case SRE_ERROR_INTERRUPTED:
487 /* An exception has already been raised, so let it fly */
488 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000489 default:
490 /* other error codes indicate compiler/engine bugs */
491 PyErr_SetString(
492 PyExc_RuntimeError,
493 "internal error in regular expression engine"
494 );
495 }
496}
497
Guido van Rossumb700df92000-03-31 14:59:30 +0000498static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000499pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000500{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000501 if (self->weakreflist != NULL)
502 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000503 Py_XDECREF(self->pattern);
504 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000505 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000506 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000507}
508
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300509LOCAL(Py_ssize_t)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300510sre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300511{
512 if (state->charsize == 1)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300513 return sre_ucs1_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300514 if (state->charsize == 2)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300515 return sre_ucs2_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300516 assert(state->charsize == 4);
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300517 return sre_ucs4_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300518}
519
520LOCAL(Py_ssize_t)
521sre_search(SRE_STATE* state, SRE_CODE* pattern)
522{
523 if (state->charsize == 1)
524 return sre_ucs1_search(state, pattern);
525 if (state->charsize == 2)
526 return sre_ucs2_search(state, pattern);
527 assert(state->charsize == 4);
528 return sre_ucs4_search(state, pattern);
529}
530
Larry Hastings16c51912014-01-07 11:53:01 -0800531static PyObject *
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200532fix_string_param(PyObject *string, PyObject *string2, const char *oldname)
533{
534 if (string2 != NULL) {
535 if (string != NULL) {
536 PyErr_Format(PyExc_TypeError,
537 "Argument given by name ('%s') and position (1)",
538 oldname);
539 return NULL;
540 }
541 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
542 "The '%s' keyword parameter name is deprecated. "
543 "Use 'string' instead.", oldname) < 0)
544 return NULL;
545 return string2;
546 }
547 if (string == NULL) {
548 PyErr_SetString(PyExc_TypeError,
549 "Required argument 'string' (pos 1) not found");
550 return NULL;
551 }
552 return string;
553}
Larry Hastings16c51912014-01-07 11:53:01 -0800554
555static PyObject *
Larry Hastings5c661892014-01-24 06:17:25 -0800556pattern_match(PatternObject *self, PyObject *args, PyObject *kwargs)
Larry Hastings16c51912014-01-07 11:53:01 -0800557{
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200558 static char *_keywords[] = {"string", "pos", "endpos", "pattern", NULL};
559 PyObject *string = NULL;
Larry Hastings16c51912014-01-07 11:53:01 -0800560 Py_ssize_t pos = 0;
561 Py_ssize_t endpos = PY_SSIZE_T_MAX;
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200562 PyObject *pattern = NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000563 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100564 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300565 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000566
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200567 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
568 "|Onn$O:match", _keywords,
569 &string, &pos, &endpos, &pattern))
570 return NULL;
571 string = fix_string_param(string, pattern, "pattern");
572 if (!string)
573 return NULL;
574 string = state_init(&state, (PatternObject *)self, string, pos, endpos);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000575 if (!string)
576 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000577
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000578 state.ptr = state.start;
579
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000580 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
581
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300582 status = sre_match(&state, PatternObject_GetCode(self), 0);
Guido van Rossumb700df92000-03-31 14:59:30 +0000583
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000584 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300585 if (PyErr_Occurred()) {
586 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000587 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300588 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000589
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300590 match = pattern_new_match(self, &state, status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000591 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300592 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000593}
594
595static PyObject*
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200596pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kw)
597{
598 SRE_STATE state;
599 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300600 PyObject *match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200601
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200602 PyObject *string = NULL, *string2 = NULL;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200603 Py_ssize_t start = 0;
604 Py_ssize_t end = PY_SSIZE_T_MAX;
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200605 static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200606 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:fullmatch", kwlist,
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200607 &string, &start, &end, &string2))
608 return NULL;
609
610 string = fix_string_param(string, string2, "pattern");
611 if (!string)
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200612 return NULL;
613
614 string = state_init(&state, self, string, start, end);
615 if (!string)
616 return NULL;
617
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200618 state.ptr = state.start;
619
620 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
621
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300622 status = sre_match(&state, PatternObject_GetCode(self), 1);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200623
624 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300625 if (PyErr_Occurred()) {
626 state_fini(&state);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200627 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300628 }
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200629
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300630 match = pattern_new_match(self, &state, status);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200631 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300632 return match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200633}
634
635static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +0000636pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +0000637{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000638 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100639 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300640 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000641
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200642 PyObject *string = NULL, *string2 = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000643 Py_ssize_t start = 0;
644 Py_ssize_t end = PY_SSIZE_T_MAX;
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200645 static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
646 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:search", kwlist,
647 &string, &start, &end, &string2))
648 return NULL;
649
650 string = fix_string_param(string, string2, "pattern");
651 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000652 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000653
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000654 string = state_init(&state, self, string, start, end);
655 if (!string)
656 return NULL;
657
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000658 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
659
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300660 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000661
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000662 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
663
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300664 if (PyErr_Occurred()) {
665 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000666 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300667 }
Thomas Wouters89f507f2006-12-13 04:49:30 +0000668
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300669 match = pattern_new_match(self, &state, status);
670 state_fini(&state);
671 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000672}
673
674static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000675call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000676{
677 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000678 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000679 PyObject* func;
680 PyObject* result;
681
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000682 if (!args)
683 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000684 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000685 if (!name)
686 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000687 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000688 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000689 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000690 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000691 func = PyObject_GetAttrString(mod, function);
692 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000693 if (!func)
694 return NULL;
695 result = PyObject_CallObject(func, args);
696 Py_DECREF(func);
697 Py_DECREF(args);
698 return result;
699}
700
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000701#ifdef USE_BUILTIN_COPY
702static int
703deepcopy(PyObject** object, PyObject* memo)
704{
705 PyObject* copy;
706
707 copy = call(
708 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000709 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000710 );
711 if (!copy)
712 return 0;
713
714 Py_DECREF(*object);
715 *object = copy;
716
717 return 1; /* success */
718}
719#endif
720
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000721static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +0000722pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +0000723{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000724 SRE_STATE state;
725 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100726 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000727 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000728
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200729 PyObject *string = NULL, *string2 = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000730 Py_ssize_t start = 0;
731 Py_ssize_t end = PY_SSIZE_T_MAX;
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200732 static char* kwlist[] = { "string", "pos", "endpos", "source", NULL };
733 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:findall", kwlist,
734 &string, &start, &end, &string2))
735 return NULL;
736
737 string = fix_string_param(string, string2, "source");
738 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000739 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000740
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000741 string = state_init(&state, self, string, start, end);
742 if (!string)
743 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000744
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000745 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000746 if (!list) {
747 state_fini(&state);
748 return NULL;
749 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000750
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000751 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000752
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000753 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000754
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000755 state_reset(&state);
756
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000757 state.ptr = state.start;
758
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300759 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300760 if (PyErr_Occurred())
761 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000762
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000763 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000764 if (status == 0)
765 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000766 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000767 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000768 }
Tim Peters3d563502006-01-21 02:47:53 +0000769
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000770 /* don't bother to build a match object */
771 switch (self->groups) {
772 case 0:
773 b = STATE_OFFSET(&state, state.start);
774 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300775 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300776 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000777 if (!item)
778 goto error;
779 break;
780 case 1:
781 item = state_getslice(&state, 1, string, 1);
782 if (!item)
783 goto error;
784 break;
785 default:
786 item = PyTuple_New(self->groups);
787 if (!item)
788 goto error;
789 for (i = 0; i < self->groups; i++) {
790 PyObject* o = state_getslice(&state, i+1, string, 1);
791 if (!o) {
792 Py_DECREF(item);
793 goto error;
794 }
795 PyTuple_SET_ITEM(item, i, o);
796 }
797 break;
798 }
799
800 status = PyList_Append(list, item);
801 Py_DECREF(item);
802 if (status < 0)
803 goto error;
804
805 if (state.ptr == state.start)
806 state.start = (void*) ((char*) state.ptr + state.charsize);
807 else
808 state.start = state.ptr;
809
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000810 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000811
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000812 state_fini(&state);
813 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000814
815error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000816 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000817 state_fini(&state);
818 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000819
Guido van Rossumb700df92000-03-31 14:59:30 +0000820}
821
Fredrik Lundh703ce812001-10-24 22:16:30 +0000822static PyObject*
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600823pattern_finditer(PatternObject* pattern, PyObject* args, PyObject* kw)
Fredrik Lundh703ce812001-10-24 22:16:30 +0000824{
825 PyObject* scanner;
826 PyObject* search;
827 PyObject* iterator;
828
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600829 scanner = pattern_scanner(pattern, args, kw);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000830 if (!scanner)
831 return NULL;
832
833 search = PyObject_GetAttrString(scanner, "search");
834 Py_DECREF(scanner);
835 if (!search)
836 return NULL;
837
838 iterator = PyCallIter_New(search, Py_None);
839 Py_DECREF(search);
840
841 return iterator;
842}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000843
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000844static PyObject*
845pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
846{
847 SRE_STATE state;
848 PyObject* list;
849 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100850 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000851 Py_ssize_t n;
852 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000853 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000854
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200855 PyObject *string = NULL, *string2 = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000856 Py_ssize_t maxsplit = 0;
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200857 static char* kwlist[] = { "string", "maxsplit", "source", NULL };
858 if (!PyArg_ParseTupleAndKeywords(args, kw, "|On$O:split", kwlist,
859 &string, &maxsplit, &string2))
860 return NULL;
861
862 string = fix_string_param(string, string2, "source");
863 if (!string)
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000864 return NULL;
865
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000866 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000867 if (!string)
868 return NULL;
869
870 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000871 if (!list) {
872 state_fini(&state);
873 return NULL;
874 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000875
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000876 n = 0;
877 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000878
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000879 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000880
881 state_reset(&state);
882
883 state.ptr = state.start;
884
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300885 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300886 if (PyErr_Occurred())
887 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000888
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000889 if (status <= 0) {
890 if (status == 0)
891 break;
892 pattern_error(status);
893 goto error;
894 }
Tim Peters3d563502006-01-21 02:47:53 +0000895
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000896 if (state.start == state.ptr) {
897 if (last == state.end)
898 break;
899 /* skip one character */
900 state.start = (void*) ((char*) state.ptr + state.charsize);
901 continue;
902 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000903
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000904 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300905 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000906 string, STATE_OFFSET(&state, last),
907 STATE_OFFSET(&state, state.start)
908 );
909 if (!item)
910 goto error;
911 status = PyList_Append(list, item);
912 Py_DECREF(item);
913 if (status < 0)
914 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000915
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000916 /* add groups (if any) */
917 for (i = 0; i < self->groups; i++) {
918 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000919 if (!item)
920 goto error;
921 status = PyList_Append(list, item);
922 Py_DECREF(item);
923 if (status < 0)
924 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000925 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000926
927 n = n + 1;
928
929 last = state.start = state.ptr;
930
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000931 }
932
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000933 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300934 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000935 string, STATE_OFFSET(&state, last), state.endpos
936 );
937 if (!item)
938 goto error;
939 status = PyList_Append(list, item);
940 Py_DECREF(item);
941 if (status < 0)
942 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000943
944 state_fini(&state);
945 return list;
946
947error:
948 Py_DECREF(list);
949 state_fini(&state);
950 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000951
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000952}
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000953
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000954static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000955pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000956 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000957{
958 SRE_STATE state;
959 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +0300960 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000961 PyObject* item;
962 PyObject* filter;
963 PyObject* args;
964 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000965 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100966 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000967 Py_ssize_t n;
968 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300969 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000970 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600971 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000972
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000973 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +0000974 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000975 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +0000976 Py_INCREF(filter);
977 filter_is_callable = 1;
978 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000979 /* if not callable, check if it's a literal string */
980 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600981 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300982 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200983 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000984 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300985 if (charsize == 1)
986 literal = memchr(ptr, '\\', n) == NULL;
987 else
988 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000989 } else {
990 PyErr_Clear();
991 literal = 0;
992 }
Benjamin Petersone48944b2012-03-07 14:50:25 -0600993 if (view.buf)
994 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000995 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000996 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000997 Py_INCREF(filter);
998 filter_is_callable = 0;
999 } else {
1000 /* not a literal; hand it over to the template compiler */
1001 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001002 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001003 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001004 );
1005 if (!filter)
1006 return NULL;
1007 filter_is_callable = PyCallable_Check(filter);
1008 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00001009 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001010
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001011 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001012 if (!string) {
1013 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001014 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001015 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001016
1017 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001018 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001019 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001020 state_fini(&state);
1021 return NULL;
1022 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001023
1024 n = i = 0;
1025
1026 while (!count || n < count) {
1027
1028 state_reset(&state);
1029
1030 state.ptr = state.start;
1031
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001032 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001033 if (PyErr_Occurred())
1034 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001035
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001036 if (status <= 0) {
1037 if (status == 0)
1038 break;
1039 pattern_error(status);
1040 goto error;
1041 }
Tim Peters3d563502006-01-21 02:47:53 +00001042
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001043 b = STATE_OFFSET(&state, state.start);
1044 e = STATE_OFFSET(&state, state.ptr);
1045
1046 if (i < b) {
1047 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001048 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001049 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001050 if (!item)
1051 goto error;
1052 status = PyList_Append(list, item);
1053 Py_DECREF(item);
1054 if (status < 0)
1055 goto error;
1056
1057 } else if (i == b && i == e && n > 0)
1058 /* ignore empty match on latest position */
1059 goto next;
1060
1061 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001062 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001063 match = pattern_new_match(self, &state, 1);
1064 if (!match)
1065 goto error;
Raymond Hettinger8ae46892003-10-12 19:09:37 +00001066 args = PyTuple_Pack(1, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001067 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00001068 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001069 goto error;
1070 }
1071 item = PyObject_CallObject(filter, args);
1072 Py_DECREF(args);
1073 Py_DECREF(match);
1074 if (!item)
1075 goto error;
1076 } else {
1077 /* filter is literal string */
1078 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001079 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001080 }
1081
1082 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001083 if (item != Py_None) {
1084 status = PyList_Append(list, item);
1085 Py_DECREF(item);
1086 if (status < 0)
1087 goto error;
1088 }
Tim Peters3d563502006-01-21 02:47:53 +00001089
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001090 i = e;
1091 n = n + 1;
1092
1093next:
1094 /* move on */
1095 if (state.ptr == state.start)
1096 state.start = (void*) ((char*) state.ptr + state.charsize);
1097 else
1098 state.start = state.ptr;
1099
1100 }
1101
1102 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001103 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001104 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001105 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001106 if (!item)
1107 goto error;
1108 status = PyList_Append(list, item);
1109 Py_DECREF(item);
1110 if (status < 0)
1111 goto error;
1112 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001113
1114 state_fini(&state);
1115
Guido van Rossum4e173842001-12-07 04:25:10 +00001116 Py_DECREF(filter);
1117
Fredrik Lundhdac58492001-10-21 21:48:30 +00001118 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001119 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001120 if (!joiner) {
1121 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001122 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001123 }
1124 if (PyList_GET_SIZE(list) == 0) {
1125 Py_DECREF(list);
1126 item = joiner;
1127 }
1128 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001129 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001130 item = _PyBytes_Join(joiner, list);
1131 else
1132 item = PyUnicode_Join(joiner, list);
1133 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001134 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001135 if (!item)
1136 return NULL;
1137 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001138
1139 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001140 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001141
1142 return item;
1143
1144error:
1145 Py_DECREF(list);
1146 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001147 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001148 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001149
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001150}
1151
1152static PyObject*
1153pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
1154{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001155 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001156 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001157 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001158 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001159 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001160 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001161 return NULL;
1162
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001163 return pattern_subx(self, ptemplate, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001164}
1165
1166static PyObject*
1167pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
1168{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001169 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001170 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001171 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001172 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001173 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001174 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001175 return NULL;
1176
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001177 return pattern_subx(self, ptemplate, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001178}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001179
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001180static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001181pattern_copy(PatternObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001182{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001183#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001184 PatternObject* copy;
1185 int offset;
1186
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001187 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
1188 if (!copy)
1189 return NULL;
1190
1191 offset = offsetof(PatternObject, groups);
1192
1193 Py_XINCREF(self->groupindex);
1194 Py_XINCREF(self->indexgroup);
1195 Py_XINCREF(self->pattern);
1196
1197 memcpy((char*) copy + offset, (char*) self + offset,
1198 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00001199 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001200
1201 return (PyObject*) copy;
1202#else
1203 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
1204 return NULL;
1205#endif
1206}
1207
1208static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001209pattern_deepcopy(PatternObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001210{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001211#ifdef USE_BUILTIN_COPY
1212 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00001213
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001214 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001215 if (!copy)
1216 return NULL;
1217
1218 if (!deepcopy(&copy->groupindex, memo) ||
1219 !deepcopy(&copy->indexgroup, memo) ||
1220 !deepcopy(&copy->pattern, memo)) {
1221 Py_DECREF(copy);
1222 return NULL;
1223 }
1224
1225#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001226 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
1227 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001228#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001229}
1230
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001231static PyObject *
1232pattern_repr(PatternObject *obj)
1233{
1234 static const struct {
1235 const char *name;
1236 int value;
1237 } flag_names[] = {
1238 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1239 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1240 {"re.LOCALE", SRE_FLAG_LOCALE},
1241 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1242 {"re.DOTALL", SRE_FLAG_DOTALL},
1243 {"re.UNICODE", SRE_FLAG_UNICODE},
1244 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1245 {"re.DEBUG", SRE_FLAG_DEBUG},
1246 {"re.ASCII", SRE_FLAG_ASCII},
1247 };
1248 PyObject *result = NULL;
1249 PyObject *flag_items;
Victor Stinner706768c2014-08-16 01:03:39 +02001250 size_t i;
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001251 int flags = obj->flags;
1252
1253 /* Omit re.UNICODE for valid string patterns. */
1254 if (obj->isbytes == 0 &&
1255 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1256 SRE_FLAG_UNICODE)
1257 flags &= ~SRE_FLAG_UNICODE;
1258
1259 flag_items = PyList_New(0);
1260 if (!flag_items)
1261 return NULL;
1262
1263 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1264 if (flags & flag_names[i].value) {
1265 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1266 if (!item)
1267 goto done;
1268
1269 if (PyList_Append(flag_items, item) < 0) {
1270 Py_DECREF(item);
1271 goto done;
1272 }
1273 Py_DECREF(item);
1274 flags &= ~flag_names[i].value;
1275 }
1276 }
1277 if (flags) {
1278 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1279 if (!item)
1280 goto done;
1281
1282 if (PyList_Append(flag_items, item) < 0) {
1283 Py_DECREF(item);
1284 goto done;
1285 }
1286 Py_DECREF(item);
1287 }
1288
1289 if (PyList_Size(flag_items) > 0) {
1290 PyObject *flags_result;
1291 PyObject *sep = PyUnicode_FromString("|");
1292 if (!sep)
1293 goto done;
1294 flags_result = PyUnicode_Join(sep, flag_items);
1295 Py_DECREF(sep);
1296 if (!flags_result)
1297 goto done;
1298 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1299 obj->pattern, flags_result);
1300 Py_DECREF(flags_result);
1301 }
1302 else {
1303 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1304 }
1305
1306done:
1307 Py_DECREF(flag_items);
1308 return result;
1309}
1310
Raymond Hettinger94478742004-09-24 04:31:19 +00001311PyDoc_STRVAR(pattern_match_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001312"match(string[, pos[, endpos]]) -> match object or None.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001313 Matches zero or more characters at the beginning of the string");
1314
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02001315PyDoc_STRVAR(pattern_fullmatch_doc,
1316"fullmatch(string[, pos[, endpos]]) -> match object or None.\n\
1317 Matches against all of the string");
1318
Raymond Hettinger94478742004-09-24 04:31:19 +00001319PyDoc_STRVAR(pattern_search_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001320"search(string[, pos[, endpos]]) -> match object or None.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001321 Scan through string looking for a match, and return a corresponding\n\
Andrew Svetlov0b64c142012-12-25 18:48:54 +02001322 match object instance. Return None if no position in the string matches.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001323
1324PyDoc_STRVAR(pattern_split_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001325"split(string[, maxsplit = 0]) -> list.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001326 Split string by the occurrences of pattern.");
1327
1328PyDoc_STRVAR(pattern_findall_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001329"findall(string[, pos[, endpos]]) -> list.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001330 Return a list of all non-overlapping matches of pattern in string.");
1331
1332PyDoc_STRVAR(pattern_finditer_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001333"finditer(string[, pos[, endpos]]) -> iterator.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001334 Return an iterator over all non-overlapping matches for the \n\
1335 RE pattern in string. For each match, the iterator returns a\n\
1336 match object.");
1337
1338PyDoc_STRVAR(pattern_sub_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001339"sub(repl, string[, count = 0]) -> newstring.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001340 Return the string obtained by replacing the leftmost non-overlapping\n\
Tim Peters3d563502006-01-21 02:47:53 +00001341 occurrences of pattern in string by the replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001342
1343PyDoc_STRVAR(pattern_subn_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001344"subn(repl, string[, count = 0]) -> (newstring, number of subs)\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001345 Return the tuple (new_string, number_of_subs_made) found by replacing\n\
1346 the leftmost non-overlapping occurrences of pattern with the\n\
Tim Peters3d563502006-01-21 02:47:53 +00001347 replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001348
1349PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
1350
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001351static PyMethodDef pattern_methods[] = {
Tim Peters3d563502006-01-21 02:47:53 +00001352 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001353 pattern_match_doc},
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02001354 {"fullmatch", (PyCFunction) pattern_fullmatch, METH_VARARGS|METH_KEYWORDS,
1355 pattern_fullmatch_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001356 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001357 pattern_search_doc},
Raymond Hettinger94478742004-09-24 04:31:19 +00001358 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001359 pattern_sub_doc},
Raymond Hettinger94478742004-09-24 04:31:19 +00001360 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001361 pattern_subn_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001362 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001363 pattern_split_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001364 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001365 pattern_findall_doc},
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001366 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001367 pattern_finditer_doc},
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001368 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS|METH_KEYWORDS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001369 {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
1370 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001371 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001372};
1373
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00001374#define PAT_OFF(x) offsetof(PatternObject, x)
1375static PyMemberDef pattern_members[] = {
1376 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
1377 {"flags", T_INT, PAT_OFF(flags), READONLY},
1378 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
1379 {"groupindex", T_OBJECT, PAT_OFF(groupindex), READONLY},
1380 {NULL} /* Sentinel */
1381};
Guido van Rossumb700df92000-03-31 14:59:30 +00001382
Neal Norwitz57c179c2006-03-22 07:18:02 +00001383static PyTypeObject Pattern_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001384 PyVarObject_HEAD_INIT(NULL, 0)
1385 "_" SRE_MODULE ".SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001386 sizeof(PatternObject), sizeof(SRE_CODE),
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001387 (destructor)pattern_dealloc, /* tp_dealloc */
1388 0, /* tp_print */
1389 0, /* tp_getattr */
1390 0, /* tp_setattr */
1391 0, /* tp_reserved */
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001392 (reprfunc)pattern_repr, /* tp_repr */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001393 0, /* tp_as_number */
1394 0, /* tp_as_sequence */
1395 0, /* tp_as_mapping */
1396 0, /* tp_hash */
1397 0, /* tp_call */
1398 0, /* tp_str */
1399 0, /* tp_getattro */
1400 0, /* tp_setattro */
1401 0, /* tp_as_buffer */
1402 Py_TPFLAGS_DEFAULT, /* tp_flags */
1403 pattern_doc, /* tp_doc */
1404 0, /* tp_traverse */
1405 0, /* tp_clear */
1406 0, /* tp_richcompare */
1407 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
1408 0, /* tp_iter */
1409 0, /* tp_iternext */
1410 pattern_methods, /* tp_methods */
1411 pattern_members, /* tp_members */
Guido van Rossumb700df92000-03-31 14:59:30 +00001412};
1413
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001414static int _validate(PatternObject *self); /* Forward */
1415
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001416static PyObject *
1417_compile(PyObject* self_, PyObject* args)
1418{
1419 /* "compile" pattern descriptor to pattern object */
1420
1421 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001422 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001423
1424 PyObject* pattern;
1425 int flags = 0;
1426 PyObject* code;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001427 Py_ssize_t groups = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001428 PyObject* groupindex = NULL;
1429 PyObject* indexgroup = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001430
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001431 if (!PyArg_ParseTuple(args, "OiO!nOO", &pattern, &flags,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001432 &PyList_Type, &code, &groups,
1433 &groupindex, &indexgroup))
1434 return NULL;
1435
1436 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001437 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001438 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1439 if (!self)
1440 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001441 self->weakreflist = NULL;
1442 self->pattern = NULL;
1443 self->groupindex = NULL;
1444 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001445
1446 self->codesize = n;
1447
1448 for (i = 0; i < n; i++) {
1449 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001450 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001451 self->code[i] = (SRE_CODE) value;
1452 if ((unsigned long) self->code[i] != value) {
1453 PyErr_SetString(PyExc_OverflowError,
1454 "regular expression code size limit exceeded");
1455 break;
1456 }
1457 }
1458
1459 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001460 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001461 return NULL;
1462 }
1463
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001464 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001465 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001466 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001467 else {
1468 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001469 int charsize;
1470 Py_buffer view;
1471 view.buf = NULL;
1472 if (!getstring(pattern, &p_length, &self->isbytes,
1473 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001474 Py_DECREF(self);
1475 return NULL;
1476 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001477 if (view.buf)
1478 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001479 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001480
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001481 Py_INCREF(pattern);
1482 self->pattern = pattern;
1483
1484 self->flags = flags;
1485
1486 self->groups = groups;
1487
1488 Py_XINCREF(groupindex);
1489 self->groupindex = groupindex;
1490
1491 Py_XINCREF(indexgroup);
1492 self->indexgroup = indexgroup;
1493
1494 self->weakreflist = NULL;
1495
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001496 if (!_validate(self)) {
1497 Py_DECREF(self);
1498 return NULL;
1499 }
1500
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001501 return (PyObject*) self;
1502}
1503
Guido van Rossumb700df92000-03-31 14:59:30 +00001504/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001505/* Code validation */
1506
1507/* To learn more about this code, have a look at the _compile() function in
1508 Lib/sre_compile.py. The validation functions below checks the code array
1509 for conformance with the code patterns generated there.
1510
1511 The nice thing about the generated code is that it is position-independent:
1512 all jumps are relative jumps forward. Also, jumps don't cross each other:
1513 the target of a later jump is always earlier than the target of an earlier
1514 jump. IOW, this is okay:
1515
1516 J---------J-------T--------T
1517 \ \_____/ /
1518 \______________________/
1519
1520 but this is not:
1521
1522 J---------J-------T--------T
1523 \_________\_____/ /
1524 \____________/
1525
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001526 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001527*/
1528
1529/* Defining this one enables tracing of the validator */
1530#undef VVERBOSE
1531
1532/* Trace macro for the validator */
1533#if defined(VVERBOSE)
1534#define VTRACE(v) printf v
1535#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001536#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001537#endif
1538
1539/* Report failure */
1540#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1541
1542/* Extract opcode, argument, or skip count from code array */
1543#define GET_OP \
1544 do { \
1545 VTRACE(("%p: ", code)); \
1546 if (code >= end) FAIL; \
1547 op = *code++; \
1548 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1549 } while (0)
1550#define GET_ARG \
1551 do { \
1552 VTRACE(("%p= ", code)); \
1553 if (code >= end) FAIL; \
1554 arg = *code++; \
1555 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1556 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001557#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001558 do { \
1559 VTRACE(("%p= ", code)); \
1560 if (code >= end) FAIL; \
1561 skip = *code; \
1562 VTRACE(("%lu (skip to %p)\n", \
1563 (unsigned long)skip, code+skip)); \
Victor Stinner1fa174a2013-08-28 02:06:21 +02001564 if (skip-adj > (Py_uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001565 FAIL; \
1566 code++; \
1567 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001568#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001569
1570static int
1571_validate_charset(SRE_CODE *code, SRE_CODE *end)
1572{
1573 /* Some variables are manipulated by the macros above */
1574 SRE_CODE op;
1575 SRE_CODE arg;
1576 SRE_CODE offset;
1577 int i;
1578
1579 while (code < end) {
1580 GET_OP;
1581 switch (op) {
1582
1583 case SRE_OP_NEGATE:
1584 break;
1585
1586 case SRE_OP_LITERAL:
1587 GET_ARG;
1588 break;
1589
1590 case SRE_OP_RANGE:
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +02001591 case SRE_OP_RANGE_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001592 GET_ARG;
1593 GET_ARG;
1594 break;
1595
1596 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001597 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001598 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001599 FAIL;
1600 code += offset;
1601 break;
1602
1603 case SRE_OP_BIGCHARSET:
1604 GET_ARG; /* Number of blocks */
1605 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001606 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001607 FAIL;
1608 /* Make sure that each byte points to a valid block */
1609 for (i = 0; i < 256; i++) {
1610 if (((unsigned char *)code)[i] >= arg)
1611 FAIL;
1612 }
1613 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001614 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001615 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001616 FAIL;
1617 code += offset;
1618 break;
1619
1620 case SRE_OP_CATEGORY:
1621 GET_ARG;
1622 switch (arg) {
1623 case SRE_CATEGORY_DIGIT:
1624 case SRE_CATEGORY_NOT_DIGIT:
1625 case SRE_CATEGORY_SPACE:
1626 case SRE_CATEGORY_NOT_SPACE:
1627 case SRE_CATEGORY_WORD:
1628 case SRE_CATEGORY_NOT_WORD:
1629 case SRE_CATEGORY_LINEBREAK:
1630 case SRE_CATEGORY_NOT_LINEBREAK:
1631 case SRE_CATEGORY_LOC_WORD:
1632 case SRE_CATEGORY_LOC_NOT_WORD:
1633 case SRE_CATEGORY_UNI_DIGIT:
1634 case SRE_CATEGORY_UNI_NOT_DIGIT:
1635 case SRE_CATEGORY_UNI_SPACE:
1636 case SRE_CATEGORY_UNI_NOT_SPACE:
1637 case SRE_CATEGORY_UNI_WORD:
1638 case SRE_CATEGORY_UNI_NOT_WORD:
1639 case SRE_CATEGORY_UNI_LINEBREAK:
1640 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1641 break;
1642 default:
1643 FAIL;
1644 }
1645 break;
1646
1647 default:
1648 FAIL;
1649
1650 }
1651 }
1652
1653 return 1;
1654}
1655
1656static int
1657_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1658{
1659 /* Some variables are manipulated by the macros above */
1660 SRE_CODE op;
1661 SRE_CODE arg;
1662 SRE_CODE skip;
1663
1664 VTRACE(("code=%p, end=%p\n", code, end));
1665
1666 if (code > end)
1667 FAIL;
1668
1669 while (code < end) {
1670 GET_OP;
1671 switch (op) {
1672
1673 case SRE_OP_MARK:
1674 /* We don't check whether marks are properly nested; the
1675 sre_match() code is robust even if they don't, and the worst
1676 you can get is nonsensical match results. */
1677 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001678 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001679 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1680 FAIL;
1681 }
1682 break;
1683
1684 case SRE_OP_LITERAL:
1685 case SRE_OP_NOT_LITERAL:
1686 case SRE_OP_LITERAL_IGNORE:
1687 case SRE_OP_NOT_LITERAL_IGNORE:
1688 GET_ARG;
1689 /* The arg is just a character, nothing to check */
1690 break;
1691
1692 case SRE_OP_SUCCESS:
1693 case SRE_OP_FAILURE:
1694 /* Nothing to check; these normally end the matching process */
1695 break;
1696
1697 case SRE_OP_AT:
1698 GET_ARG;
1699 switch (arg) {
1700 case SRE_AT_BEGINNING:
1701 case SRE_AT_BEGINNING_STRING:
1702 case SRE_AT_BEGINNING_LINE:
1703 case SRE_AT_END:
1704 case SRE_AT_END_LINE:
1705 case SRE_AT_END_STRING:
1706 case SRE_AT_BOUNDARY:
1707 case SRE_AT_NON_BOUNDARY:
1708 case SRE_AT_LOC_BOUNDARY:
1709 case SRE_AT_LOC_NON_BOUNDARY:
1710 case SRE_AT_UNI_BOUNDARY:
1711 case SRE_AT_UNI_NON_BOUNDARY:
1712 break;
1713 default:
1714 FAIL;
1715 }
1716 break;
1717
1718 case SRE_OP_ANY:
1719 case SRE_OP_ANY_ALL:
1720 /* These have no operands */
1721 break;
1722
1723 case SRE_OP_IN:
1724 case SRE_OP_IN_IGNORE:
1725 GET_SKIP;
1726 /* Stop 1 before the end; we check the FAILURE below */
1727 if (!_validate_charset(code, code+skip-2))
1728 FAIL;
1729 if (code[skip-2] != SRE_OP_FAILURE)
1730 FAIL;
1731 code += skip-1;
1732 break;
1733
1734 case SRE_OP_INFO:
1735 {
1736 /* A minimal info field is
1737 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1738 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1739 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001740 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001741 SRE_CODE *newcode;
1742 GET_SKIP;
1743 newcode = code+skip-1;
1744 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001745 GET_ARG;
1746 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001747 /* Check that only valid flags are present */
1748 if ((flags & ~(SRE_INFO_PREFIX |
1749 SRE_INFO_LITERAL |
1750 SRE_INFO_CHARSET)) != 0)
1751 FAIL;
1752 /* PREFIX and CHARSET are mutually exclusive */
1753 if ((flags & SRE_INFO_PREFIX) &&
1754 (flags & SRE_INFO_CHARSET))
1755 FAIL;
1756 /* LITERAL implies PREFIX */
1757 if ((flags & SRE_INFO_LITERAL) &&
1758 !(flags & SRE_INFO_PREFIX))
1759 FAIL;
1760 /* Validate the prefix */
1761 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001762 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001763 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001764 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001765 /* Here comes the prefix string */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001766 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001767 FAIL;
1768 code += prefix_len;
1769 /* And here comes the overlap table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001770 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001771 FAIL;
1772 /* Each overlap value should be < prefix_len */
1773 for (i = 0; i < prefix_len; i++) {
1774 if (code[i] >= prefix_len)
1775 FAIL;
1776 }
1777 code += prefix_len;
1778 }
1779 /* Validate the charset */
1780 if (flags & SRE_INFO_CHARSET) {
1781 if (!_validate_charset(code, newcode-1))
1782 FAIL;
1783 if (newcode[-1] != SRE_OP_FAILURE)
1784 FAIL;
1785 code = newcode;
1786 }
1787 else if (code != newcode) {
1788 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1789 FAIL;
1790 }
1791 }
1792 break;
1793
1794 case SRE_OP_BRANCH:
1795 {
1796 SRE_CODE *target = NULL;
1797 for (;;) {
1798 GET_SKIP;
1799 if (skip == 0)
1800 break;
1801 /* Stop 2 before the end; we check the JUMP below */
1802 if (!_validate_inner(code, code+skip-3, groups))
1803 FAIL;
1804 code += skip-3;
1805 /* Check that it ends with a JUMP, and that each JUMP
1806 has the same target */
1807 GET_OP;
1808 if (op != SRE_OP_JUMP)
1809 FAIL;
1810 GET_SKIP;
1811 if (target == NULL)
1812 target = code+skip-1;
1813 else if (code+skip-1 != target)
1814 FAIL;
1815 }
1816 }
1817 break;
1818
1819 case SRE_OP_REPEAT_ONE:
1820 case SRE_OP_MIN_REPEAT_ONE:
1821 {
1822 SRE_CODE min, max;
1823 GET_SKIP;
1824 GET_ARG; min = arg;
1825 GET_ARG; max = arg;
1826 if (min > max)
1827 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001828 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001829 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001830 if (!_validate_inner(code, code+skip-4, groups))
1831 FAIL;
1832 code += skip-4;
1833 GET_OP;
1834 if (op != SRE_OP_SUCCESS)
1835 FAIL;
1836 }
1837 break;
1838
1839 case SRE_OP_REPEAT:
1840 {
1841 SRE_CODE min, max;
1842 GET_SKIP;
1843 GET_ARG; min = arg;
1844 GET_ARG; max = arg;
1845 if (min > max)
1846 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001847 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001848 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001849 if (!_validate_inner(code, code+skip-3, groups))
1850 FAIL;
1851 code += skip-3;
1852 GET_OP;
1853 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1854 FAIL;
1855 }
1856 break;
1857
1858 case SRE_OP_GROUPREF:
1859 case SRE_OP_GROUPREF_IGNORE:
1860 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001861 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001862 FAIL;
1863 break;
1864
1865 case SRE_OP_GROUPREF_EXISTS:
1866 /* The regex syntax for this is: '(?(group)then|else)', where
1867 'group' is either an integer group number or a group name,
1868 'then' and 'else' are sub-regexes, and 'else' is optional. */
1869 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001870 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001871 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001872 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001873 code--; /* The skip is relative to the first arg! */
1874 /* There are two possibilities here: if there is both a 'then'
1875 part and an 'else' part, the generated code looks like:
1876
1877 GROUPREF_EXISTS
1878 <group>
1879 <skipyes>
1880 ...then part...
1881 JUMP
1882 <skipno>
1883 (<skipyes> jumps here)
1884 ...else part...
1885 (<skipno> jumps here)
1886
1887 If there is only a 'then' part, it looks like:
1888
1889 GROUPREF_EXISTS
1890 <group>
1891 <skip>
1892 ...then part...
1893 (<skip> jumps here)
1894
1895 There is no direct way to decide which it is, and we don't want
1896 to allow arbitrary jumps anywhere in the code; so we just look
1897 for a JUMP opcode preceding our skip target.
1898 */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001899 if (skip >= 3 && skip-3 < (Py_uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001900 code[skip-3] == SRE_OP_JUMP)
1901 {
1902 VTRACE(("both then and else parts present\n"));
1903 if (!_validate_inner(code+1, code+skip-3, groups))
1904 FAIL;
1905 code += skip-2; /* Position after JUMP, at <skipno> */
1906 GET_SKIP;
1907 if (!_validate_inner(code, code+skip-1, groups))
1908 FAIL;
1909 code += skip-1;
1910 }
1911 else {
1912 VTRACE(("only a then part present\n"));
1913 if (!_validate_inner(code+1, code+skip-1, groups))
1914 FAIL;
1915 code += skip-1;
1916 }
1917 break;
1918
1919 case SRE_OP_ASSERT:
1920 case SRE_OP_ASSERT_NOT:
1921 GET_SKIP;
1922 GET_ARG; /* 0 for lookahead, width for lookbehind */
1923 code--; /* Back up over arg to simplify math below */
1924 if (arg & 0x80000000)
1925 FAIL; /* Width too large */
1926 /* Stop 1 before the end; we check the SUCCESS below */
1927 if (!_validate_inner(code+1, code+skip-2, groups))
1928 FAIL;
1929 code += skip-2;
1930 GET_OP;
1931 if (op != SRE_OP_SUCCESS)
1932 FAIL;
1933 break;
1934
1935 default:
1936 FAIL;
1937
1938 }
1939 }
1940
1941 VTRACE(("okay\n"));
1942 return 1;
1943}
1944
1945static int
1946_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1947{
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001948 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1949 code >= end || end[-1] != SRE_OP_SUCCESS)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001950 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001951 return _validate_inner(code, end-1, groups);
1952}
1953
1954static int
1955_validate(PatternObject *self)
1956{
1957 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1958 {
1959 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1960 return 0;
1961 }
1962 else
1963 VTRACE(("Success!\n"));
1964 return 1;
1965}
1966
1967/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001968/* match methods */
1969
1970static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001971match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001972{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001973 Py_XDECREF(self->regs);
1974 Py_XDECREF(self->string);
1975 Py_DECREF(self->pattern);
1976 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001977}
1978
1979static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001980match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001981{
Serhiy Storchaka25324972013-10-16 12:46:28 +03001982 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001983 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001984 Py_buffer view;
1985 PyObject *result;
1986 void* ptr;
1987
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001988 if (index < 0 || index >= self->groups) {
1989 /* raise IndexError if we were given a bad group number */
1990 PyErr_SetString(
1991 PyExc_IndexError,
1992 "no such group"
1993 );
1994 return NULL;
1995 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001996
Fredrik Lundh6f013982000-07-03 18:44:21 +00001997 index *= 2;
1998
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001999 if (self->string == Py_None || self->mark[index] < 0) {
2000 /* return default value if the string or group is undefined */
2001 Py_INCREF(def);
2002 return def;
2003 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002004
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002005 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03002006 if (ptr == NULL)
2007 return NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002008 result = getslice(isbytes, ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +03002009 self->string, self->mark[index], self->mark[index+1]);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002010 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03002011 PyBuffer_Release(&view);
2012 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002013}
2014
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002015static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002016match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002017{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002018 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002019
Guido van Rossumddefaf32007-01-14 03:31:43 +00002020 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002021 /* Default value */
2022 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00002023
Christian Heimes217cfd12007-12-02 14:31:20 +00002024 if (PyLong_Check(index))
2025 return PyLong_AsSsize_t(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00002026
Fredrik Lundh6f013982000-07-03 18:44:21 +00002027 i = -1;
2028
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002029 if (self->pattern->groupindex) {
2030 index = PyObject_GetItem(self->pattern->groupindex, index);
2031 if (index) {
Neal Norwitz1fe5f382007-08-31 04:32:55 +00002032 if (PyLong_Check(index))
Christian Heimes217cfd12007-12-02 14:31:20 +00002033 i = PyLong_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00002034 Py_DECREF(index);
2035 } else
2036 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002037 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002038
2039 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002040}
2041
2042static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002043match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002044{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002045 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002046}
2047
2048static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002049match_expand(MatchObject* self, PyObject* ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002050{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002051 /* delegate to Python code */
2052 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00002053 SRE_PY_MODULE, "_expand",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002054 PyTuple_Pack(3, self->pattern, self, ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002055 );
2056}
2057
2058static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002059match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002060{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002061 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002062 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002063
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002064 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002065
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002066 switch (size) {
2067 case 0:
2068 result = match_getslice(self, Py_False, Py_None);
2069 break;
2070 case 1:
2071 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2072 break;
2073 default:
2074 /* fetch multiple items */
2075 result = PyTuple_New(size);
2076 if (!result)
2077 return NULL;
2078 for (i = 0; i < size; i++) {
2079 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002080 self, PyTuple_GET_ITEM(args, i), Py_None
2081 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002082 if (!item) {
2083 Py_DECREF(result);
2084 return NULL;
2085 }
2086 PyTuple_SET_ITEM(result, i, item);
2087 }
2088 break;
2089 }
2090 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002091}
2092
2093static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002094match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002095{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002096 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002097 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002098
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002099 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002100 static char* kwlist[] = { "default", NULL };
Fredrik Lundh562586e2000-10-03 20:43:34 +00002101 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002102 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002103
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002104 result = PyTuple_New(self->groups-1);
2105 if (!result)
2106 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002107
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002108 for (index = 1; index < self->groups; index++) {
2109 PyObject* item;
2110 item = match_getslice_by_index(self, index, def);
2111 if (!item) {
2112 Py_DECREF(result);
2113 return NULL;
2114 }
2115 PyTuple_SET_ITEM(result, index-1, item);
2116 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002117
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002118 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002119}
2120
2121static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002122match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002123{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002124 PyObject* result;
2125 PyObject* keys;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002126 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002127
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002128 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002129 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00002130 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002131 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002132
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002133 result = PyDict_New();
2134 if (!result || !self->pattern->groupindex)
2135 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002136
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002137 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002138 if (!keys)
2139 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002140
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002141 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002142 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002143 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002144 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002145 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002146 if (!key)
2147 goto failed;
2148 value = match_getslice(self, key, def);
2149 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002150 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002151 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002152 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002153 status = PyDict_SetItem(result, key, value);
2154 Py_DECREF(value);
2155 if (status < 0)
2156 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002157 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002158
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002159 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002160
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002161 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002162
2163failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00002164 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002165 Py_DECREF(result);
2166 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002167}
2168
2169static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002170match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002171{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002172 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002173
Guido van Rossumddefaf32007-01-14 03:31:43 +00002174 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002175 if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002176 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002177
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002178 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002179
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002180 if (index < 0 || index >= self->groups) {
2181 PyErr_SetString(
2182 PyExc_IndexError,
2183 "no such group"
2184 );
2185 return NULL;
2186 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002187
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002188 /* mark is -1 if group is undefined */
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002189 return PyLong_FromSsize_t(self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002190}
2191
2192static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002193match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002194{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002195 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002196
Guido van Rossumddefaf32007-01-14 03:31:43 +00002197 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002198 if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002199 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002200
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002201 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002202
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002203 if (index < 0 || index >= self->groups) {
2204 PyErr_SetString(
2205 PyExc_IndexError,
2206 "no such group"
2207 );
2208 return NULL;
2209 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002210
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002211 /* mark is -1 if group is undefined */
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002212 return PyLong_FromSsize_t(self->mark[index*2+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002213}
2214
2215LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002216_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002217{
2218 PyObject* pair;
2219 PyObject* item;
2220
2221 pair = PyTuple_New(2);
2222 if (!pair)
2223 return NULL;
2224
Christian Heimes217cfd12007-12-02 14:31:20 +00002225 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002226 if (!item)
2227 goto error;
2228 PyTuple_SET_ITEM(pair, 0, item);
2229
Christian Heimes217cfd12007-12-02 14:31:20 +00002230 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002231 if (!item)
2232 goto error;
2233 PyTuple_SET_ITEM(pair, 1, item);
2234
2235 return pair;
2236
2237 error:
2238 Py_DECREF(pair);
2239 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002240}
2241
2242static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002243match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002244{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002245 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002246
Guido van Rossumddefaf32007-01-14 03:31:43 +00002247 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002248 if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002249 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002250
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002251 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002252
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002253 if (index < 0 || index >= self->groups) {
2254 PyErr_SetString(
2255 PyExc_IndexError,
2256 "no such group"
2257 );
2258 return NULL;
2259 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002260
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002261 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002262 return _pair(self->mark[index*2], self->mark[index*2+1]);
2263}
2264
2265static PyObject*
2266match_regs(MatchObject* self)
2267{
2268 PyObject* regs;
2269 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002270 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002271
2272 regs = PyTuple_New(self->groups);
2273 if (!regs)
2274 return NULL;
2275
2276 for (index = 0; index < self->groups; index++) {
2277 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2278 if (!item) {
2279 Py_DECREF(regs);
2280 return NULL;
2281 }
2282 PyTuple_SET_ITEM(regs, index, item);
2283 }
2284
2285 Py_INCREF(regs);
2286 self->regs = regs;
2287
2288 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002289}
2290
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002291static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002292match_copy(MatchObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002293{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002294#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002295 MatchObject* copy;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002296 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00002297
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002298 slots = 2 * (self->pattern->groups+1);
2299
2300 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2301 if (!copy)
2302 return NULL;
2303
2304 /* this value a constant, but any compiler should be able to
2305 figure that out all by itself */
2306 offset = offsetof(MatchObject, string);
2307
2308 Py_XINCREF(self->pattern);
2309 Py_XINCREF(self->string);
2310 Py_XINCREF(self->regs);
2311
2312 memcpy((char*) copy + offset, (char*) self + offset,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002313 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002314
2315 return (PyObject*) copy;
2316#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002317 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002318 return NULL;
2319#endif
2320}
2321
2322static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002323match_deepcopy(MatchObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002324{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002325#ifdef USE_BUILTIN_COPY
2326 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002327
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002328 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002329 if (!copy)
2330 return NULL;
2331
2332 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2333 !deepcopy(&copy->string, memo) ||
2334 !deepcopy(&copy->regs, memo)) {
2335 Py_DECREF(copy);
2336 return NULL;
2337 }
2338
2339#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002340 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2341 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002342#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002343}
2344
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002345PyDoc_STRVAR(match_doc,
2346"The result of re.match() and re.search().\n\
2347Match objects always have a boolean value of True.");
2348
2349PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002350"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002351 Return subgroup(s) of the match by indices or names.\n\
2352 For 0 returns the entire match.");
2353
2354PyDoc_STRVAR(match_start_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002355"start([group=0]) -> int.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002356 Return index of the start of the substring matched by group.");
2357
2358PyDoc_STRVAR(match_end_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002359"end([group=0]) -> int.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002360 Return index of the end of the substring matched by group.");
2361
2362PyDoc_STRVAR(match_span_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002363"span([group]) -> tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002364 For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).");
2365
2366PyDoc_STRVAR(match_groups_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002367"groups([default=None]) -> tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002368 Return a tuple containing all the subgroups of the match, from 1.\n\
2369 The default argument is used for groups\n\
2370 that did not participate in the match");
2371
2372PyDoc_STRVAR(match_groupdict_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002373"groupdict([default=None]) -> dict.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002374 Return a dictionary containing all the named subgroups of the match,\n\
2375 keyed by the subgroup name. The default argument is used for groups\n\
2376 that did not participate in the match");
2377
2378PyDoc_STRVAR(match_expand_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002379"expand(template) -> str.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002380 Return the string obtained by doing backslash substitution\n\
2381 on the string template, as done by the sub() method.");
2382
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002383static PyMethodDef match_methods[] = {
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002384 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2385 {"start", (PyCFunction) match_start, METH_VARARGS, match_start_doc},
2386 {"end", (PyCFunction) match_end, METH_VARARGS, match_end_doc},
2387 {"span", (PyCFunction) match_span, METH_VARARGS, match_span_doc},
2388 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS,
2389 match_groups_doc},
2390 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS,
2391 match_groupdict_doc},
2392 {"expand", (PyCFunction) match_expand, METH_O, match_expand_doc},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002393 {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
2394 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002395 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002396};
2397
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002398static PyObject *
2399match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002400{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002401 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002402 return PyLong_FromSsize_t(self->lastindex);
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002403 Py_INCREF(Py_None);
2404 return Py_None;
Guido van Rossumb700df92000-03-31 14:59:30 +00002405}
2406
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002407static PyObject *
2408match_lastgroup_get(MatchObject *self)
2409{
2410 if (self->pattern->indexgroup && self->lastindex >= 0) {
2411 PyObject* result = PySequence_GetItem(
2412 self->pattern->indexgroup, self->lastindex
2413 );
2414 if (result)
2415 return result;
2416 PyErr_Clear();
2417 }
2418 Py_INCREF(Py_None);
2419 return Py_None;
2420}
2421
2422static PyObject *
2423match_regs_get(MatchObject *self)
2424{
2425 if (self->regs) {
2426 Py_INCREF(self->regs);
2427 return self->regs;
2428 } else
2429 return match_regs(self);
2430}
2431
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002432static PyObject *
2433match_repr(MatchObject *self)
2434{
2435 PyObject *result;
2436 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2437 if (group0 == NULL)
2438 return NULL;
2439 result = PyUnicode_FromFormat(
2440 "<%s object; span=(%d, %d), match=%.50R>",
2441 Py_TYPE(self)->tp_name,
2442 self->mark[0], self->mark[1], group0);
2443 Py_DECREF(group0);
2444 return result;
2445}
2446
2447
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002448static PyGetSetDef match_getset[] = {
2449 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
2450 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
2451 {"regs", (getter)match_regs_get, (setter)NULL},
2452 {NULL}
2453};
2454
2455#define MATCH_OFF(x) offsetof(MatchObject, x)
2456static PyMemberDef match_members[] = {
2457 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
2458 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
2459 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
2460 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
2461 {NULL}
2462};
2463
Guido van Rossumb700df92000-03-31 14:59:30 +00002464/* FIXME: implement setattr("string", None) as a special case (to
2465 detach the associated string, if any */
2466
Neal Norwitz57c179c2006-03-22 07:18:02 +00002467static PyTypeObject Match_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002468 PyVarObject_HEAD_INIT(NULL,0)
2469 "_" SRE_MODULE ".SRE_Match",
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002470 sizeof(MatchObject), sizeof(Py_ssize_t),
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002471 (destructor)match_dealloc, /* tp_dealloc */
2472 0, /* tp_print */
2473 0, /* tp_getattr */
2474 0, /* tp_setattr */
2475 0, /* tp_reserved */
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002476 (reprfunc)match_repr, /* tp_repr */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002477 0, /* tp_as_number */
2478 0, /* tp_as_sequence */
2479 0, /* tp_as_mapping */
2480 0, /* tp_hash */
2481 0, /* tp_call */
2482 0, /* tp_str */
2483 0, /* tp_getattro */
2484 0, /* tp_setattro */
2485 0, /* tp_as_buffer */
2486 Py_TPFLAGS_DEFAULT, /* tp_flags */
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002487 match_doc, /* tp_doc */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002488 0, /* tp_traverse */
2489 0, /* tp_clear */
2490 0, /* tp_richcompare */
2491 0, /* tp_weaklistoffset */
2492 0, /* tp_iter */
2493 0, /* tp_iternext */
2494 match_methods, /* tp_methods */
2495 match_members, /* tp_members */
2496 match_getset, /* tp_getset */
Guido van Rossumb700df92000-03-31 14:59:30 +00002497};
2498
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002499static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002500pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002501{
2502 /* create match object (from state object) */
2503
2504 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002505 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002506 char* base;
2507 int n;
2508
2509 if (status > 0) {
2510
2511 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002512 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002513 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2514 2*(pattern->groups+1));
2515 if (!match)
2516 return NULL;
2517
2518 Py_INCREF(pattern);
2519 match->pattern = pattern;
2520
2521 Py_INCREF(state->string);
2522 match->string = state->string;
2523
2524 match->regs = NULL;
2525 match->groups = pattern->groups+1;
2526
2527 /* fill in group slices */
2528
2529 base = (char*) state->beginning;
2530 n = state->charsize;
2531
2532 match->mark[0] = ((char*) state->start - base) / n;
2533 match->mark[1] = ((char*) state->ptr - base) / n;
2534
2535 for (i = j = 0; i < pattern->groups; i++, j+=2)
2536 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2537 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2538 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2539 } else
2540 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2541
2542 match->pos = state->pos;
2543 match->endpos = state->endpos;
2544
2545 match->lastindex = state->lastindex;
2546
2547 return (PyObject*) match;
2548
2549 } else if (status == 0) {
2550
2551 /* no match */
2552 Py_INCREF(Py_None);
2553 return Py_None;
2554
2555 }
2556
2557 /* internal error */
2558 pattern_error(status);
2559 return NULL;
2560}
2561
2562
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002563/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002564/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002565
2566static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002567scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002568{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002569 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002570 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002571 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002572}
2573
2574static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002575scanner_match(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002576{
2577 SRE_STATE* state = &self->state;
2578 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002579 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002580
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002581 state_reset(state);
2582
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002583 state->ptr = state->start;
2584
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03002585 status = sre_match(state, PatternObject_GetCode(self->pattern), 0);
Thomas Wouters89f507f2006-12-13 04:49:30 +00002586 if (PyErr_Occurred())
2587 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002588
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002589 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002590 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002591
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00002592 if (status == 0 || state->ptr == state->start)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002593 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002594 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002595 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002596
2597 return match;
2598}
2599
2600
2601static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002602scanner_search(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002603{
2604 SRE_STATE* state = &self->state;
2605 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002606 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002607
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002608 state_reset(state);
2609
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002610 state->ptr = state->start;
2611
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002612 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002613 if (PyErr_Occurred())
2614 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002615
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002616 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002617 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002618
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00002619 if (status == 0 || state->ptr == state->start)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002620 state->start = (void*) ((char*) state->ptr + state->charsize);
2621 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002622 state->start = state->ptr;
2623
2624 return match;
2625}
2626
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002627static PyMethodDef scanner_methods[] = {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002628 {"match", (PyCFunction) scanner_match, METH_NOARGS},
2629 {"search", (PyCFunction) scanner_search, METH_NOARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002630 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002631};
2632
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002633#define SCAN_OFF(x) offsetof(ScannerObject, x)
2634static PyMemberDef scanner_members[] = {
Ezio Melotti7c8c1ea2011-09-29 01:00:19 +03002635 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002636 {NULL} /* Sentinel */
2637};
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002638
Neal Norwitz57c179c2006-03-22 07:18:02 +00002639static PyTypeObject Scanner_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002640 PyVarObject_HEAD_INIT(NULL, 0)
2641 "_" SRE_MODULE ".SRE_Scanner",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002642 sizeof(ScannerObject), 0,
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002643 (destructor)scanner_dealloc,/* tp_dealloc */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002644 0, /* tp_print */
2645 0, /* tp_getattr */
2646 0, /* tp_setattr */
2647 0, /* tp_reserved */
2648 0, /* tp_repr */
2649 0, /* tp_as_number */
2650 0, /* tp_as_sequence */
2651 0, /* tp_as_mapping */
2652 0, /* tp_hash */
2653 0, /* tp_call */
2654 0, /* tp_str */
2655 0, /* tp_getattro */
2656 0, /* tp_setattro */
2657 0, /* tp_as_buffer */
2658 Py_TPFLAGS_DEFAULT, /* tp_flags */
2659 0, /* tp_doc */
2660 0, /* tp_traverse */
2661 0, /* tp_clear */
2662 0, /* tp_richcompare */
2663 0, /* tp_weaklistoffset */
2664 0, /* tp_iter */
2665 0, /* tp_iternext */
2666 scanner_methods, /* tp_methods */
2667 scanner_members, /* tp_members */
2668 0, /* tp_getset */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002669};
2670
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002671static PyObject*
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06002672pattern_scanner(PatternObject* pattern, PyObject* args, PyObject* kw)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002673{
2674 /* create search state object */
2675
2676 ScannerObject* self;
2677
Serhiy Storchakaccdf3522014-03-06 11:28:32 +02002678 PyObject *string = NULL, *string2 = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002679 Py_ssize_t start = 0;
2680 Py_ssize_t end = PY_SSIZE_T_MAX;
Serhiy Storchakaccdf3522014-03-06 11:28:32 +02002681 static char* kwlist[] = { "string", "pos", "endpos", "source", NULL };
2682 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:scanner", kwlist,
2683 &string, &start, &end, &string2))
2684 return NULL;
2685
2686 string = fix_string_param(string, string2, "source");
2687 if (!string)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002688 return NULL;
2689
2690 /* create scanner object */
2691 self = PyObject_NEW(ScannerObject, &Scanner_Type);
2692 if (!self)
2693 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002694 self->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002695
2696 string = state_init(&self->state, pattern, string, start, end);
2697 if (!string) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002698 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002699 return NULL;
2700 }
2701
2702 Py_INCREF(pattern);
2703 self->pattern = (PyObject*) pattern;
2704
2705 return (PyObject*) self;
2706}
2707
Guido van Rossumb700df92000-03-31 14:59:30 +00002708static PyMethodDef _functions[] = {
Neal Norwitzb0493252002-03-31 14:44:22 +00002709 {"compile", _compile, METH_VARARGS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002710 {"getcodesize", sre_codesize, METH_NOARGS},
Neal Norwitzb0493252002-03-31 14:44:22 +00002711 {"getlower", sre_getlower, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002712 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002713};
2714
Martin v. Löwis1a214512008-06-11 05:26:20 +00002715static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002716 PyModuleDef_HEAD_INIT,
2717 "_" SRE_MODULE,
2718 NULL,
2719 -1,
2720 _functions,
2721 NULL,
2722 NULL,
2723 NULL,
2724 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002725};
2726
2727PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002728{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002729 PyObject* m;
2730 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002731 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002732
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002733 /* Patch object types */
2734 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2735 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002736 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002737
Martin v. Löwis1a214512008-06-11 05:26:20 +00002738 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002739 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002740 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002741 d = PyModule_GetDict(m);
2742
Christian Heimes217cfd12007-12-02 14:31:20 +00002743 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002744 if (x) {
2745 PyDict_SetItemString(d, "MAGIC", x);
2746 Py_DECREF(x);
2747 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002748
Christian Heimes217cfd12007-12-02 14:31:20 +00002749 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002750 if (x) {
2751 PyDict_SetItemString(d, "CODESIZE", x);
2752 Py_DECREF(x);
2753 }
2754
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002755 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2756 if (x) {
2757 PyDict_SetItemString(d, "MAXREPEAT", x);
2758 Py_DECREF(x);
2759 }
2760
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03002761 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2762 if (x) {
2763 PyDict_SetItemString(d, "MAXGROUPS", x);
2764 Py_DECREF(x);
2765 }
2766
Neal Norwitzfe537132007-08-26 03:55:15 +00002767 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002768 if (x) {
2769 PyDict_SetItemString(d, "copyright", x);
2770 Py_DECREF(x);
2771 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002772 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002773}
2774
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002775/* vim:ts=4:sw=4:et
2776*/