blob: 3a92db92d553c838fbf39ee43dc98e136f9dcc11 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Georg Brandldaa1fa92013-10-13 09:32:59 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
Guido van Rossumb700df92000-03-31 14:59:30 +000025 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000026 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000027 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000028 * This version of the SRE library can be redistributed under CNRI's
29 * Python 1.6 license. For any other use, please contact Secret Labs
30 * AB (info@pythonware.com).
31 *
Guido van Rossumb700df92000-03-31 14:59:30 +000032 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000033 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000034 * other compatibility work.
35 */
36
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000037static char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000038 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000039
Thomas Wouters0e3f5912006-08-11 14:57:12 +000040#define PY_SSIZE_T_CLEAN
41
Guido van Rossumb700df92000-03-31 14:59:30 +000042#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000043#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000044
45#include "sre.h"
46
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030047#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
48
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000049#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000050
Fredrik Lundh436c3d582000-06-29 08:58:44 +000051/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000052#if !defined(SRE_MODULE)
53#define SRE_MODULE "sre"
54#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000055
Thomas Wouters9ada3d62006-04-21 09:47:09 +000056#define SRE_PY_MODULE "re"
57
Guido van Rossumb700df92000-03-31 14:59:30 +000058/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000059#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000060
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000061/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000062/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000063
64/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000065#define USE_FAST_SEARCH
66
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000067/* enables copy/deepcopy handling (work in progress) */
68#undef USE_BUILTIN_COPY
69
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000070/* -------------------------------------------------------------------- */
71
Fredrik Lundh80946112000-06-29 18:03:25 +000072#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000073#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000074#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000075/* fastest possible local call under MSVC */
76#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000077#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000078#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000079#else
80#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000081#endif
82
83/* error codes */
84#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000085#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000086#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000087#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000088#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000089
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000090#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000091#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000092#else
93#define TRACE(v)
94#endif
95
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000096/* -------------------------------------------------------------------- */
97/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000098
Fredrik Lundh436c3d582000-06-29 08:58:44 +000099/* default character predicates (run sre_chars.py to regenerate tables) */
100
101#define SRE_DIGIT_MASK 1
102#define SRE_SPACE_MASK 2
103#define SRE_LINEBREAK_MASK 4
104#define SRE_ALNUM_MASK 8
105#define SRE_WORD_MASK 16
106
Fredrik Lundh21009b92001-09-18 18:47:09 +0000107/* FIXME: this assumes ASCII. create tables in init_sre() instead */
108
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000109static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1102, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
11225, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
11324, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1140, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
11524, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
116
Fredrik Lundhb389df32000-06-29 12:48:37 +0000117static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000011810, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
11927, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
12044, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
12161, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
122108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
123122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
124106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
125120, 121, 122, 123, 124, 125, 126, 127 };
126
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000127#define SRE_IS_DIGIT(ch)\
128 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
129#define SRE_IS_SPACE(ch)\
130 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
131#define SRE_IS_LINEBREAK(ch)\
132 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
133#define SRE_IS_ALNUM(ch)\
134 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
135#define SRE_IS_WORD(ch)\
136 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000137
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000138static unsigned int sre_lower(unsigned int ch)
139{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000140 return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000141}
142
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000143/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000144/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
145 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000146#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000147#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
148
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000149static unsigned int sre_lower_locale(unsigned int ch)
150{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000151 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000152}
153
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000154/* unicode-specific character predicates */
155
Victor Stinner0058b862011-09-29 03:27:47 +0200156#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
157#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
158#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
159#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
160#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000161
162static unsigned int sre_lower_unicode(unsigned int ch)
163{
Victor Stinner0058b862011-09-29 03:27:47 +0200164 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000165}
166
Guido van Rossumb700df92000-03-31 14:59:30 +0000167LOCAL(int)
168sre_category(SRE_CODE category, unsigned int ch)
169{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000170 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000171
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000172 case SRE_CATEGORY_DIGIT:
173 return SRE_IS_DIGIT(ch);
174 case SRE_CATEGORY_NOT_DIGIT:
175 return !SRE_IS_DIGIT(ch);
176 case SRE_CATEGORY_SPACE:
177 return SRE_IS_SPACE(ch);
178 case SRE_CATEGORY_NOT_SPACE:
179 return !SRE_IS_SPACE(ch);
180 case SRE_CATEGORY_WORD:
181 return SRE_IS_WORD(ch);
182 case SRE_CATEGORY_NOT_WORD:
183 return !SRE_IS_WORD(ch);
184 case SRE_CATEGORY_LINEBREAK:
185 return SRE_IS_LINEBREAK(ch);
186 case SRE_CATEGORY_NOT_LINEBREAK:
187 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000188
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000189 case SRE_CATEGORY_LOC_WORD:
190 return SRE_LOC_IS_WORD(ch);
191 case SRE_CATEGORY_LOC_NOT_WORD:
192 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000193
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000194 case SRE_CATEGORY_UNI_DIGIT:
195 return SRE_UNI_IS_DIGIT(ch);
196 case SRE_CATEGORY_UNI_NOT_DIGIT:
197 return !SRE_UNI_IS_DIGIT(ch);
198 case SRE_CATEGORY_UNI_SPACE:
199 return SRE_UNI_IS_SPACE(ch);
200 case SRE_CATEGORY_UNI_NOT_SPACE:
201 return !SRE_UNI_IS_SPACE(ch);
202 case SRE_CATEGORY_UNI_WORD:
203 return SRE_UNI_IS_WORD(ch);
204 case SRE_CATEGORY_UNI_NOT_WORD:
205 return !SRE_UNI_IS_WORD(ch);
206 case SRE_CATEGORY_UNI_LINEBREAK:
207 return SRE_UNI_IS_LINEBREAK(ch);
208 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
209 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000210 }
211 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000212}
213
214/* helpers */
215
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000216static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000217data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000218{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000219 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000220 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000221 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000222 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000223 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000224}
225
226static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000227data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000228{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000229 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000230 minsize = state->data_stack_base+size;
231 cursize = state->data_stack_size;
232 if (cursize < minsize) {
233 void* stack;
234 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300235 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000236 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000237 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000238 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000239 return SRE_ERROR_MEMORY;
240 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000241 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000242 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000243 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000244 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000245}
246
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000247/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000248
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300249#define SRE_CHAR Py_UCS1
250#define SIZEOF_SRE_CHAR 1
251#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300252#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000253
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300254/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000255
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300256#define SRE_CHAR Py_UCS2
257#define SIZEOF_SRE_CHAR 2
258#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300259#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000260
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300261/* generate 32-bit unicode version */
262
263#define SRE_CHAR Py_UCS4
264#define SIZEOF_SRE_CHAR 4
265#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300266#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000267
268/* -------------------------------------------------------------------- */
269/* factories and destructors */
270
271/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100272static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600273static PyObject*pattern_scanner(PatternObject*, PyObject*, PyObject* kw);
Guido van Rossumb700df92000-03-31 14:59:30 +0000274
275static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000276sre_codesize(PyObject* self, PyObject *unused)
Guido van Rossumb700df92000-03-31 14:59:30 +0000277{
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100278 return PyLong_FromSize_t(sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +0000279}
280
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000281static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +0000282sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000283{
284 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000285 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000286 return NULL;
287 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000288 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000289 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000290 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundhb389df32000-06-29 12:48:37 +0000291 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000292}
293
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000294LOCAL(void)
295state_reset(SRE_STATE* state)
296{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000297 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000298 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000299
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000300 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000301 state->lastindex = -1;
302
303 state->repeat = NULL;
304
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000305 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000306}
307
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000308static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200309getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300310 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600311 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000312{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000313 /* given a python object, return a data pointer, a length (in
314 characters), and a character size. return NULL if the object
315 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000316
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000317 /* Unicode objects do not support the buffer API. So, get the data
318 directly instead. */
319 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200320 if (PyUnicode_READY(string) == -1)
321 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200322 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200323 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300324 *p_isbytes = 0;
325 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000326 }
327
Victor Stinner0058b862011-09-29 03:27:47 +0200328 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300329 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
330 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
331 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000332 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000333
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300334 *p_length = view->len;
335 *p_charsize = 1;
336 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000337
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300338 if (view->buf == NULL) {
339 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
340 PyBuffer_Release(view);
341 view->buf = NULL;
342 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000343 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300344 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000345}
346
347LOCAL(PyObject*)
348state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000349 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000350{
351 /* prepare state object */
352
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000353 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300354 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000355 void* ptr;
356
357 memset(state, 0, sizeof(SRE_STATE));
358
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000359 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000360 state->lastindex = -1;
361
Benjamin Petersone48944b2012-03-07 14:50:25 -0600362 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300363 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000364 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600365 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000366
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300367 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600368 PyErr_SetString(PyExc_TypeError,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300369 "can't use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600370 goto err;
371 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300372 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600373 PyErr_SetString(PyExc_TypeError,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300374 "can't use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600375 goto err;
376 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000377
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000378 /* adjust boundaries */
379 if (start < 0)
380 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000381 else if (start > length)
382 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000383
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000384 if (end < 0)
385 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000386 else if (end > length)
387 end = length;
388
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300389 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000390 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000391
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000392 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000393
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000394 state->start = (void*) ((char*) ptr + start * state->charsize);
395 state->end = (void*) ((char*) ptr + end * state->charsize);
396
397 Py_INCREF(string);
398 state->string = string;
399 state->pos = start;
400 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000401
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000402 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000403 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000404 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000405 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000406 else
Fredrik Lundhb389df32000-06-29 12:48:37 +0000407 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000408
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000409 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600410 err:
411 if (state->buffer.buf)
412 PyBuffer_Release(&state->buffer);
413 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000414}
415
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000416LOCAL(void)
417state_fini(SRE_STATE* state)
418{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600419 if (state->buffer.buf)
420 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000421 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000422 data_stack_dealloc(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000423}
424
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000425/* calculate offset from start of string */
426#define STATE_OFFSET(state, member)\
427 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
428
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000429LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300430getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300431 PyObject* string, Py_ssize_t start, Py_ssize_t end)
432{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300433 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300434 if (PyBytes_CheckExact(string) &&
435 start == 0 && end == PyBytes_GET_SIZE(string)) {
436 Py_INCREF(string);
437 return string;
438 }
439 return PyBytes_FromStringAndSize(
440 (const char *)ptr + start, end - start);
441 }
442 else {
443 return PyUnicode_Substring(string, start, end);
444 }
445}
446
447LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000448state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000449{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000450 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000451
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000452 index = (index - 1) * 2;
453
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000454 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000455 if (empty)
456 /* want empty string */
457 i = j = 0;
458 else {
459 Py_INCREF(Py_None);
460 return Py_None;
461 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000462 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000463 i = STATE_OFFSET(state, state->mark[index]);
464 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000465 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000466
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300467 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000468}
469
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000470static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100471pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000472{
473 switch (status) {
474 case SRE_ERROR_RECURSION_LIMIT:
475 PyErr_SetString(
476 PyExc_RuntimeError,
477 "maximum recursion limit exceeded"
478 );
479 break;
480 case SRE_ERROR_MEMORY:
481 PyErr_NoMemory();
482 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000483 case SRE_ERROR_INTERRUPTED:
484 /* An exception has already been raised, so let it fly */
485 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000486 default:
487 /* other error codes indicate compiler/engine bugs */
488 PyErr_SetString(
489 PyExc_RuntimeError,
490 "internal error in regular expression engine"
491 );
492 }
493}
494
Guido van Rossumb700df92000-03-31 14:59:30 +0000495static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000496pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000497{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000498 if (self->weakreflist != NULL)
499 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000500 Py_XDECREF(self->pattern);
501 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000502 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000503 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000504}
505
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300506LOCAL(Py_ssize_t)
507sre_match(SRE_STATE* state, SRE_CODE* pattern)
508{
509 if (state->charsize == 1)
510 return sre_ucs1_match(state, pattern);
511 if (state->charsize == 2)
512 return sre_ucs2_match(state, pattern);
513 assert(state->charsize == 4);
514 return sre_ucs4_match(state, pattern);
515}
516
517LOCAL(Py_ssize_t)
518sre_search(SRE_STATE* state, SRE_CODE* pattern)
519{
520 if (state->charsize == 1)
521 return sre_ucs1_search(state, pattern);
522 if (state->charsize == 2)
523 return sre_ucs2_search(state, pattern);
524 assert(state->charsize == 4);
525 return sre_ucs4_search(state, pattern);
526}
527
Guido van Rossumb700df92000-03-31 14:59:30 +0000528static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +0000529pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +0000530{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000531 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100532 Py_ssize_t status;
Guido van Rossumb700df92000-03-31 14:59:30 +0000533
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000534 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000535 Py_ssize_t start = 0;
536 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +0000537 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000538 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:match", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +0000539 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000540 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000541
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000542 string = state_init(&state, self, string, start, end);
543 if (!string)
544 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000545
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000546 state.ptr = state.start;
547
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000548 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
549
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300550 status = sre_match(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000551
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000552 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Thomas Wouters89f507f2006-12-13 04:49:30 +0000553 if (PyErr_Occurred())
554 return NULL;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000555
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000556 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +0000557
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000558 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +0000559}
560
561static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +0000562pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +0000563{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000564 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100565 Py_ssize_t status;
Guido van Rossumb700df92000-03-31 14:59:30 +0000566
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000567 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000568 Py_ssize_t start = 0;
569 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +0000570 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000571 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:search", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +0000572 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000573 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000574
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000575 string = state_init(&state, self, string, start, end);
576 if (!string)
577 return NULL;
578
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000579 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
580
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300581 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000582
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000583 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
584
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000585 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +0000586
Thomas Wouters89f507f2006-12-13 04:49:30 +0000587 if (PyErr_Occurred())
588 return NULL;
589
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000590 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +0000591}
592
593static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000594call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000595{
596 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000597 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000598 PyObject* func;
599 PyObject* result;
600
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000601 if (!args)
602 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000603 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000604 if (!name)
605 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000606 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000607 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000608 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000609 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000610 func = PyObject_GetAttrString(mod, function);
611 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000612 if (!func)
613 return NULL;
614 result = PyObject_CallObject(func, args);
615 Py_DECREF(func);
616 Py_DECREF(args);
617 return result;
618}
619
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000620#ifdef USE_BUILTIN_COPY
621static int
622deepcopy(PyObject** object, PyObject* memo)
623{
624 PyObject* copy;
625
626 copy = call(
627 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000628 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000629 );
630 if (!copy)
631 return 0;
632
633 Py_DECREF(*object);
634 *object = copy;
635
636 return 1; /* success */
637}
638#endif
639
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000640static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +0000641pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +0000642{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000643 SRE_STATE state;
644 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100645 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000646 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000647
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000648 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000649 Py_ssize_t start = 0;
650 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +0000651 static char* kwlist[] = { "source", "pos", "endpos", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000652 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +0000653 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000654 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000655
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000656 string = state_init(&state, self, string, start, end);
657 if (!string)
658 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000659
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000660 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000661 if (!list) {
662 state_fini(&state);
663 return NULL;
664 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000665
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000666 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000667
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000668 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000669
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000670 state_reset(&state);
671
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000672 state.ptr = state.start;
673
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300674 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300675 if (PyErr_Occurred())
676 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000677
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000678 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000679 if (status == 0)
680 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000681 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000682 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000683 }
Tim Peters3d563502006-01-21 02:47:53 +0000684
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000685 /* don't bother to build a match object */
686 switch (self->groups) {
687 case 0:
688 b = STATE_OFFSET(&state, state.start);
689 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300690 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300691 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000692 if (!item)
693 goto error;
694 break;
695 case 1:
696 item = state_getslice(&state, 1, string, 1);
697 if (!item)
698 goto error;
699 break;
700 default:
701 item = PyTuple_New(self->groups);
702 if (!item)
703 goto error;
704 for (i = 0; i < self->groups; i++) {
705 PyObject* o = state_getslice(&state, i+1, string, 1);
706 if (!o) {
707 Py_DECREF(item);
708 goto error;
709 }
710 PyTuple_SET_ITEM(item, i, o);
711 }
712 break;
713 }
714
715 status = PyList_Append(list, item);
716 Py_DECREF(item);
717 if (status < 0)
718 goto error;
719
720 if (state.ptr == state.start)
721 state.start = (void*) ((char*) state.ptr + state.charsize);
722 else
723 state.start = state.ptr;
724
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000725 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000726
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000727 state_fini(&state);
728 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000729
730error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000731 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000732 state_fini(&state);
733 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000734
Guido van Rossumb700df92000-03-31 14:59:30 +0000735}
736
Fredrik Lundh703ce812001-10-24 22:16:30 +0000737static PyObject*
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600738pattern_finditer(PatternObject* pattern, PyObject* args, PyObject* kw)
Fredrik Lundh703ce812001-10-24 22:16:30 +0000739{
740 PyObject* scanner;
741 PyObject* search;
742 PyObject* iterator;
743
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600744 scanner = pattern_scanner(pattern, args, kw);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000745 if (!scanner)
746 return NULL;
747
748 search = PyObject_GetAttrString(scanner, "search");
749 Py_DECREF(scanner);
750 if (!search)
751 return NULL;
752
753 iterator = PyCallIter_New(search, Py_None);
754 Py_DECREF(search);
755
756 return iterator;
757}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000758
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000759static PyObject*
760pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
761{
762 SRE_STATE state;
763 PyObject* list;
764 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100765 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000766 Py_ssize_t n;
767 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000768 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000769
770 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000771 Py_ssize_t maxsplit = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +0000772 static char* kwlist[] = { "source", "maxsplit", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000773 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|n:split", kwlist,
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000774 &string, &maxsplit))
775 return NULL;
776
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000777 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000778 if (!string)
779 return NULL;
780
781 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000782 if (!list) {
783 state_fini(&state);
784 return NULL;
785 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000786
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000787 n = 0;
788 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000789
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000790 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000791
792 state_reset(&state);
793
794 state.ptr = state.start;
795
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300796 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300797 if (PyErr_Occurred())
798 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000799
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000800 if (status <= 0) {
801 if (status == 0)
802 break;
803 pattern_error(status);
804 goto error;
805 }
Tim Peters3d563502006-01-21 02:47:53 +0000806
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000807 if (state.start == state.ptr) {
808 if (last == state.end)
809 break;
810 /* skip one character */
811 state.start = (void*) ((char*) state.ptr + state.charsize);
812 continue;
813 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000814
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000815 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300816 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000817 string, STATE_OFFSET(&state, last),
818 STATE_OFFSET(&state, state.start)
819 );
820 if (!item)
821 goto error;
822 status = PyList_Append(list, item);
823 Py_DECREF(item);
824 if (status < 0)
825 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000826
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000827 /* add groups (if any) */
828 for (i = 0; i < self->groups; i++) {
829 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000830 if (!item)
831 goto error;
832 status = PyList_Append(list, item);
833 Py_DECREF(item);
834 if (status < 0)
835 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000836 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000837
838 n = n + 1;
839
840 last = state.start = state.ptr;
841
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000842 }
843
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000844 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300845 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000846 string, STATE_OFFSET(&state, last), state.endpos
847 );
848 if (!item)
849 goto error;
850 status = PyList_Append(list, item);
851 Py_DECREF(item);
852 if (status < 0)
853 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000854
855 state_fini(&state);
856 return list;
857
858error:
859 Py_DECREF(list);
860 state_fini(&state);
861 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000862
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000863}
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000864
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000865static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000866pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000867 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000868{
869 SRE_STATE state;
870 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +0300871 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000872 PyObject* item;
873 PyObject* filter;
874 PyObject* args;
875 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000876 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100877 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000878 Py_ssize_t n;
879 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300880 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000881 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600882 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000883
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000884 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +0000885 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000886 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +0000887 Py_INCREF(filter);
888 filter_is_callable = 1;
889 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000890 /* if not callable, check if it's a literal string */
891 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600892 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300893 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200894 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000895 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300896 if (charsize == 1)
897 literal = memchr(ptr, '\\', n) == NULL;
898 else
899 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000900 } else {
901 PyErr_Clear();
902 literal = 0;
903 }
Benjamin Petersone48944b2012-03-07 14:50:25 -0600904 if (view.buf)
905 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000906 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000907 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000908 Py_INCREF(filter);
909 filter_is_callable = 0;
910 } else {
911 /* not a literal; hand it over to the template compiler */
912 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +0000913 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000914 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000915 );
916 if (!filter)
917 return NULL;
918 filter_is_callable = PyCallable_Check(filter);
919 }
Fredrik Lundhdac58492001-10-21 21:48:30 +0000920 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000921
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000922 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh82b23072001-12-09 16:13:15 +0000923 if (!string) {
924 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000925 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +0000926 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000927
928 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000929 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +0000930 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000931 state_fini(&state);
932 return NULL;
933 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000934
935 n = i = 0;
936
937 while (!count || n < count) {
938
939 state_reset(&state);
940
941 state.ptr = state.start;
942
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300943 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300944 if (PyErr_Occurred())
945 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000946
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000947 if (status <= 0) {
948 if (status == 0)
949 break;
950 pattern_error(status);
951 goto error;
952 }
Tim Peters3d563502006-01-21 02:47:53 +0000953
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000954 b = STATE_OFFSET(&state, state.start);
955 e = STATE_OFFSET(&state, state.ptr);
956
957 if (i < b) {
958 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300959 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300960 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000961 if (!item)
962 goto error;
963 status = PyList_Append(list, item);
964 Py_DECREF(item);
965 if (status < 0)
966 goto error;
967
968 } else if (i == b && i == e && n > 0)
969 /* ignore empty match on latest position */
970 goto next;
971
972 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +0000973 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000974 match = pattern_new_match(self, &state, 1);
975 if (!match)
976 goto error;
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000977 args = PyTuple_Pack(1, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000978 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +0000979 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000980 goto error;
981 }
982 item = PyObject_CallObject(filter, args);
983 Py_DECREF(args);
984 Py_DECREF(match);
985 if (!item)
986 goto error;
987 } else {
988 /* filter is literal string */
989 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +0000990 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000991 }
992
993 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000994 if (item != Py_None) {
995 status = PyList_Append(list, item);
996 Py_DECREF(item);
997 if (status < 0)
998 goto error;
999 }
Tim Peters3d563502006-01-21 02:47:53 +00001000
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001001 i = e;
1002 n = n + 1;
1003
1004next:
1005 /* move on */
1006 if (state.ptr == state.start)
1007 state.start = (void*) ((char*) state.ptr + state.charsize);
1008 else
1009 state.start = state.ptr;
1010
1011 }
1012
1013 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001014 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001015 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001016 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001017 if (!item)
1018 goto error;
1019 status = PyList_Append(list, item);
1020 Py_DECREF(item);
1021 if (status < 0)
1022 goto error;
1023 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001024
1025 state_fini(&state);
1026
Guido van Rossum4e173842001-12-07 04:25:10 +00001027 Py_DECREF(filter);
1028
Fredrik Lundhdac58492001-10-21 21:48:30 +00001029 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001030 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001031 if (!joiner) {
1032 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001033 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001034 }
1035 if (PyList_GET_SIZE(list) == 0) {
1036 Py_DECREF(list);
1037 item = joiner;
1038 }
1039 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001040 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001041 item = _PyBytes_Join(joiner, list);
1042 else
1043 item = PyUnicode_Join(joiner, list);
1044 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001045 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001046 if (!item)
1047 return NULL;
1048 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001049
1050 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001051 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001052
1053 return item;
1054
1055error:
1056 Py_DECREF(list);
1057 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001058 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001059 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001060
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001061}
1062
1063static PyObject*
1064pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
1065{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001066 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001067 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001068 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001069 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001070 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001071 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001072 return NULL;
1073
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001074 return pattern_subx(self, ptemplate, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001075}
1076
1077static PyObject*
1078pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
1079{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001080 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001081 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001082 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001083 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001084 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001085 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001086 return NULL;
1087
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001088 return pattern_subx(self, ptemplate, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001089}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001090
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001091static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001092pattern_copy(PatternObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001093{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001094#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001095 PatternObject* copy;
1096 int offset;
1097
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001098 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
1099 if (!copy)
1100 return NULL;
1101
1102 offset = offsetof(PatternObject, groups);
1103
1104 Py_XINCREF(self->groupindex);
1105 Py_XINCREF(self->indexgroup);
1106 Py_XINCREF(self->pattern);
1107
1108 memcpy((char*) copy + offset, (char*) self + offset,
1109 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00001110 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001111
1112 return (PyObject*) copy;
1113#else
1114 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
1115 return NULL;
1116#endif
1117}
1118
1119static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001120pattern_deepcopy(PatternObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001121{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001122#ifdef USE_BUILTIN_COPY
1123 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00001124
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001125 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001126 if (!copy)
1127 return NULL;
1128
1129 if (!deepcopy(&copy->groupindex, memo) ||
1130 !deepcopy(&copy->indexgroup, memo) ||
1131 !deepcopy(&copy->pattern, memo)) {
1132 Py_DECREF(copy);
1133 return NULL;
1134 }
1135
1136#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001137 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
1138 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001139#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001140}
1141
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001142static PyObject *
1143pattern_repr(PatternObject *obj)
1144{
1145 static const struct {
1146 const char *name;
1147 int value;
1148 } flag_names[] = {
1149 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1150 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1151 {"re.LOCALE", SRE_FLAG_LOCALE},
1152 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1153 {"re.DOTALL", SRE_FLAG_DOTALL},
1154 {"re.UNICODE", SRE_FLAG_UNICODE},
1155 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1156 {"re.DEBUG", SRE_FLAG_DEBUG},
1157 {"re.ASCII", SRE_FLAG_ASCII},
1158 };
1159 PyObject *result = NULL;
1160 PyObject *flag_items;
1161 int i;
1162 int flags = obj->flags;
1163
1164 /* Omit re.UNICODE for valid string patterns. */
1165 if (obj->isbytes == 0 &&
1166 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1167 SRE_FLAG_UNICODE)
1168 flags &= ~SRE_FLAG_UNICODE;
1169
1170 flag_items = PyList_New(0);
1171 if (!flag_items)
1172 return NULL;
1173
1174 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1175 if (flags & flag_names[i].value) {
1176 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1177 if (!item)
1178 goto done;
1179
1180 if (PyList_Append(flag_items, item) < 0) {
1181 Py_DECREF(item);
1182 goto done;
1183 }
1184 Py_DECREF(item);
1185 flags &= ~flag_names[i].value;
1186 }
1187 }
1188 if (flags) {
1189 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1190 if (!item)
1191 goto done;
1192
1193 if (PyList_Append(flag_items, item) < 0) {
1194 Py_DECREF(item);
1195 goto done;
1196 }
1197 Py_DECREF(item);
1198 }
1199
1200 if (PyList_Size(flag_items) > 0) {
1201 PyObject *flags_result;
1202 PyObject *sep = PyUnicode_FromString("|");
1203 if (!sep)
1204 goto done;
1205 flags_result = PyUnicode_Join(sep, flag_items);
1206 Py_DECREF(sep);
1207 if (!flags_result)
1208 goto done;
1209 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1210 obj->pattern, flags_result);
1211 Py_DECREF(flags_result);
1212 }
1213 else {
1214 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1215 }
1216
1217done:
1218 Py_DECREF(flag_items);
1219 return result;
1220}
1221
Raymond Hettinger94478742004-09-24 04:31:19 +00001222PyDoc_STRVAR(pattern_match_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001223"match(string[, pos[, endpos]]) -> match object or None.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001224 Matches zero or more characters at the beginning of the string");
1225
1226PyDoc_STRVAR(pattern_search_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001227"search(string[, pos[, endpos]]) -> match object or None.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001228 Scan through string looking for a match, and return a corresponding\n\
Andrew Svetlov0b64c142012-12-25 18:48:54 +02001229 match object instance. Return None if no position in the string matches.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001230
1231PyDoc_STRVAR(pattern_split_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001232"split(string[, maxsplit = 0]) -> list.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001233 Split string by the occurrences of pattern.");
1234
1235PyDoc_STRVAR(pattern_findall_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001236"findall(string[, pos[, endpos]]) -> list.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001237 Return a list of all non-overlapping matches of pattern in string.");
1238
1239PyDoc_STRVAR(pattern_finditer_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001240"finditer(string[, pos[, endpos]]) -> iterator.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001241 Return an iterator over all non-overlapping matches for the \n\
1242 RE pattern in string. For each match, the iterator returns a\n\
1243 match object.");
1244
1245PyDoc_STRVAR(pattern_sub_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001246"sub(repl, string[, count = 0]) -> newstring.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001247 Return the string obtained by replacing the leftmost non-overlapping\n\
Tim Peters3d563502006-01-21 02:47:53 +00001248 occurrences of pattern in string by the replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001249
1250PyDoc_STRVAR(pattern_subn_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001251"subn(repl, string[, count = 0]) -> (newstring, number of subs)\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001252 Return the tuple (new_string, number_of_subs_made) found by replacing\n\
1253 the leftmost non-overlapping occurrences of pattern with the\n\
Tim Peters3d563502006-01-21 02:47:53 +00001254 replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001255
1256PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
1257
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001258static PyMethodDef pattern_methods[] = {
Tim Peters3d563502006-01-21 02:47:53 +00001259 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001260 pattern_match_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001261 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001262 pattern_search_doc},
Raymond Hettinger94478742004-09-24 04:31:19 +00001263 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001264 pattern_sub_doc},
Raymond Hettinger94478742004-09-24 04:31:19 +00001265 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001266 pattern_subn_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001267 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001268 pattern_split_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001269 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001270 pattern_findall_doc},
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001271 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001272 pattern_finditer_doc},
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001273 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS|METH_KEYWORDS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001274 {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
1275 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001276 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001277};
1278
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00001279#define PAT_OFF(x) offsetof(PatternObject, x)
1280static PyMemberDef pattern_members[] = {
1281 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
1282 {"flags", T_INT, PAT_OFF(flags), READONLY},
1283 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
1284 {"groupindex", T_OBJECT, PAT_OFF(groupindex), READONLY},
1285 {NULL} /* Sentinel */
1286};
Guido van Rossumb700df92000-03-31 14:59:30 +00001287
Neal Norwitz57c179c2006-03-22 07:18:02 +00001288static PyTypeObject Pattern_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001289 PyVarObject_HEAD_INIT(NULL, 0)
1290 "_" SRE_MODULE ".SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001291 sizeof(PatternObject), sizeof(SRE_CODE),
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001292 (destructor)pattern_dealloc, /* tp_dealloc */
1293 0, /* tp_print */
1294 0, /* tp_getattr */
1295 0, /* tp_setattr */
1296 0, /* tp_reserved */
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001297 (reprfunc)pattern_repr, /* tp_repr */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001298 0, /* tp_as_number */
1299 0, /* tp_as_sequence */
1300 0, /* tp_as_mapping */
1301 0, /* tp_hash */
1302 0, /* tp_call */
1303 0, /* tp_str */
1304 0, /* tp_getattro */
1305 0, /* tp_setattro */
1306 0, /* tp_as_buffer */
1307 Py_TPFLAGS_DEFAULT, /* tp_flags */
1308 pattern_doc, /* tp_doc */
1309 0, /* tp_traverse */
1310 0, /* tp_clear */
1311 0, /* tp_richcompare */
1312 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
1313 0, /* tp_iter */
1314 0, /* tp_iternext */
1315 pattern_methods, /* tp_methods */
1316 pattern_members, /* tp_members */
Guido van Rossumb700df92000-03-31 14:59:30 +00001317};
1318
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001319static int _validate(PatternObject *self); /* Forward */
1320
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001321static PyObject *
1322_compile(PyObject* self_, PyObject* args)
1323{
1324 /* "compile" pattern descriptor to pattern object */
1325
1326 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001327 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001328
1329 PyObject* pattern;
1330 int flags = 0;
1331 PyObject* code;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001332 Py_ssize_t groups = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001333 PyObject* groupindex = NULL;
1334 PyObject* indexgroup = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001335
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001336 if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001337 &PyList_Type, &code, &groups,
1338 &groupindex, &indexgroup))
1339 return NULL;
1340
1341 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001342 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001343 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1344 if (!self)
1345 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001346 self->weakreflist = NULL;
1347 self->pattern = NULL;
1348 self->groupindex = NULL;
1349 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001350
1351 self->codesize = n;
1352
1353 for (i = 0; i < n; i++) {
1354 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001355 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001356 self->code[i] = (SRE_CODE) value;
1357 if ((unsigned long) self->code[i] != value) {
1358 PyErr_SetString(PyExc_OverflowError,
1359 "regular expression code size limit exceeded");
1360 break;
1361 }
1362 }
1363
1364 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001365 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001366 return NULL;
1367 }
1368
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001369 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001370 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001371 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001372 else {
1373 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001374 int charsize;
1375 Py_buffer view;
1376 view.buf = NULL;
1377 if (!getstring(pattern, &p_length, &self->isbytes,
1378 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001379 Py_DECREF(self);
1380 return NULL;
1381 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001382 if (view.buf)
1383 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001384 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001385
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001386 Py_INCREF(pattern);
1387 self->pattern = pattern;
1388
1389 self->flags = flags;
1390
1391 self->groups = groups;
1392
1393 Py_XINCREF(groupindex);
1394 self->groupindex = groupindex;
1395
1396 Py_XINCREF(indexgroup);
1397 self->indexgroup = indexgroup;
1398
1399 self->weakreflist = NULL;
1400
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001401 if (!_validate(self)) {
1402 Py_DECREF(self);
1403 return NULL;
1404 }
1405
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001406 return (PyObject*) self;
1407}
1408
Guido van Rossumb700df92000-03-31 14:59:30 +00001409/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001410/* Code validation */
1411
1412/* To learn more about this code, have a look at the _compile() function in
1413 Lib/sre_compile.py. The validation functions below checks the code array
1414 for conformance with the code patterns generated there.
1415
1416 The nice thing about the generated code is that it is position-independent:
1417 all jumps are relative jumps forward. Also, jumps don't cross each other:
1418 the target of a later jump is always earlier than the target of an earlier
1419 jump. IOW, this is okay:
1420
1421 J---------J-------T--------T
1422 \ \_____/ /
1423 \______________________/
1424
1425 but this is not:
1426
1427 J---------J-------T--------T
1428 \_________\_____/ /
1429 \____________/
1430
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001431 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001432*/
1433
1434/* Defining this one enables tracing of the validator */
1435#undef VVERBOSE
1436
1437/* Trace macro for the validator */
1438#if defined(VVERBOSE)
1439#define VTRACE(v) printf v
1440#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001441#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001442#endif
1443
1444/* Report failure */
1445#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1446
1447/* Extract opcode, argument, or skip count from code array */
1448#define GET_OP \
1449 do { \
1450 VTRACE(("%p: ", code)); \
1451 if (code >= end) FAIL; \
1452 op = *code++; \
1453 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1454 } while (0)
1455#define GET_ARG \
1456 do { \
1457 VTRACE(("%p= ", code)); \
1458 if (code >= end) FAIL; \
1459 arg = *code++; \
1460 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1461 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001462#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001463 do { \
1464 VTRACE(("%p= ", code)); \
1465 if (code >= end) FAIL; \
1466 skip = *code; \
1467 VTRACE(("%lu (skip to %p)\n", \
1468 (unsigned long)skip, code+skip)); \
Victor Stinner1fa174a2013-08-28 02:06:21 +02001469 if (skip-adj > (Py_uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001470 FAIL; \
1471 code++; \
1472 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001473#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001474
1475static int
1476_validate_charset(SRE_CODE *code, SRE_CODE *end)
1477{
1478 /* Some variables are manipulated by the macros above */
1479 SRE_CODE op;
1480 SRE_CODE arg;
1481 SRE_CODE offset;
1482 int i;
1483
1484 while (code < end) {
1485 GET_OP;
1486 switch (op) {
1487
1488 case SRE_OP_NEGATE:
1489 break;
1490
1491 case SRE_OP_LITERAL:
1492 GET_ARG;
1493 break;
1494
1495 case SRE_OP_RANGE:
1496 GET_ARG;
1497 GET_ARG;
1498 break;
1499
1500 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001501 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001502 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001503 FAIL;
1504 code += offset;
1505 break;
1506
1507 case SRE_OP_BIGCHARSET:
1508 GET_ARG; /* Number of blocks */
1509 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001510 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001511 FAIL;
1512 /* Make sure that each byte points to a valid block */
1513 for (i = 0; i < 256; i++) {
1514 if (((unsigned char *)code)[i] >= arg)
1515 FAIL;
1516 }
1517 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001518 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001519 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001520 FAIL;
1521 code += offset;
1522 break;
1523
1524 case SRE_OP_CATEGORY:
1525 GET_ARG;
1526 switch (arg) {
1527 case SRE_CATEGORY_DIGIT:
1528 case SRE_CATEGORY_NOT_DIGIT:
1529 case SRE_CATEGORY_SPACE:
1530 case SRE_CATEGORY_NOT_SPACE:
1531 case SRE_CATEGORY_WORD:
1532 case SRE_CATEGORY_NOT_WORD:
1533 case SRE_CATEGORY_LINEBREAK:
1534 case SRE_CATEGORY_NOT_LINEBREAK:
1535 case SRE_CATEGORY_LOC_WORD:
1536 case SRE_CATEGORY_LOC_NOT_WORD:
1537 case SRE_CATEGORY_UNI_DIGIT:
1538 case SRE_CATEGORY_UNI_NOT_DIGIT:
1539 case SRE_CATEGORY_UNI_SPACE:
1540 case SRE_CATEGORY_UNI_NOT_SPACE:
1541 case SRE_CATEGORY_UNI_WORD:
1542 case SRE_CATEGORY_UNI_NOT_WORD:
1543 case SRE_CATEGORY_UNI_LINEBREAK:
1544 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1545 break;
1546 default:
1547 FAIL;
1548 }
1549 break;
1550
1551 default:
1552 FAIL;
1553
1554 }
1555 }
1556
1557 return 1;
1558}
1559
1560static int
1561_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1562{
1563 /* Some variables are manipulated by the macros above */
1564 SRE_CODE op;
1565 SRE_CODE arg;
1566 SRE_CODE skip;
1567
1568 VTRACE(("code=%p, end=%p\n", code, end));
1569
1570 if (code > end)
1571 FAIL;
1572
1573 while (code < end) {
1574 GET_OP;
1575 switch (op) {
1576
1577 case SRE_OP_MARK:
1578 /* We don't check whether marks are properly nested; the
1579 sre_match() code is robust even if they don't, and the worst
1580 you can get is nonsensical match results. */
1581 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001582 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001583 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1584 FAIL;
1585 }
1586 break;
1587
1588 case SRE_OP_LITERAL:
1589 case SRE_OP_NOT_LITERAL:
1590 case SRE_OP_LITERAL_IGNORE:
1591 case SRE_OP_NOT_LITERAL_IGNORE:
1592 GET_ARG;
1593 /* The arg is just a character, nothing to check */
1594 break;
1595
1596 case SRE_OP_SUCCESS:
1597 case SRE_OP_FAILURE:
1598 /* Nothing to check; these normally end the matching process */
1599 break;
1600
1601 case SRE_OP_AT:
1602 GET_ARG;
1603 switch (arg) {
1604 case SRE_AT_BEGINNING:
1605 case SRE_AT_BEGINNING_STRING:
1606 case SRE_AT_BEGINNING_LINE:
1607 case SRE_AT_END:
1608 case SRE_AT_END_LINE:
1609 case SRE_AT_END_STRING:
1610 case SRE_AT_BOUNDARY:
1611 case SRE_AT_NON_BOUNDARY:
1612 case SRE_AT_LOC_BOUNDARY:
1613 case SRE_AT_LOC_NON_BOUNDARY:
1614 case SRE_AT_UNI_BOUNDARY:
1615 case SRE_AT_UNI_NON_BOUNDARY:
1616 break;
1617 default:
1618 FAIL;
1619 }
1620 break;
1621
1622 case SRE_OP_ANY:
1623 case SRE_OP_ANY_ALL:
1624 /* These have no operands */
1625 break;
1626
1627 case SRE_OP_IN:
1628 case SRE_OP_IN_IGNORE:
1629 GET_SKIP;
1630 /* Stop 1 before the end; we check the FAILURE below */
1631 if (!_validate_charset(code, code+skip-2))
1632 FAIL;
1633 if (code[skip-2] != SRE_OP_FAILURE)
1634 FAIL;
1635 code += skip-1;
1636 break;
1637
1638 case SRE_OP_INFO:
1639 {
1640 /* A minimal info field is
1641 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1642 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1643 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001644 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001645 SRE_CODE *newcode;
1646 GET_SKIP;
1647 newcode = code+skip-1;
1648 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001649 GET_ARG;
1650 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001651 /* Check that only valid flags are present */
1652 if ((flags & ~(SRE_INFO_PREFIX |
1653 SRE_INFO_LITERAL |
1654 SRE_INFO_CHARSET)) != 0)
1655 FAIL;
1656 /* PREFIX and CHARSET are mutually exclusive */
1657 if ((flags & SRE_INFO_PREFIX) &&
1658 (flags & SRE_INFO_CHARSET))
1659 FAIL;
1660 /* LITERAL implies PREFIX */
1661 if ((flags & SRE_INFO_LITERAL) &&
1662 !(flags & SRE_INFO_PREFIX))
1663 FAIL;
1664 /* Validate the prefix */
1665 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001666 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001667 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001668 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001669 /* Here comes the prefix string */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001670 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001671 FAIL;
1672 code += prefix_len;
1673 /* And here comes the overlap table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001674 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001675 FAIL;
1676 /* Each overlap value should be < prefix_len */
1677 for (i = 0; i < prefix_len; i++) {
1678 if (code[i] >= prefix_len)
1679 FAIL;
1680 }
1681 code += prefix_len;
1682 }
1683 /* Validate the charset */
1684 if (flags & SRE_INFO_CHARSET) {
1685 if (!_validate_charset(code, newcode-1))
1686 FAIL;
1687 if (newcode[-1] != SRE_OP_FAILURE)
1688 FAIL;
1689 code = newcode;
1690 }
1691 else if (code != newcode) {
1692 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1693 FAIL;
1694 }
1695 }
1696 break;
1697
1698 case SRE_OP_BRANCH:
1699 {
1700 SRE_CODE *target = NULL;
1701 for (;;) {
1702 GET_SKIP;
1703 if (skip == 0)
1704 break;
1705 /* Stop 2 before the end; we check the JUMP below */
1706 if (!_validate_inner(code, code+skip-3, groups))
1707 FAIL;
1708 code += skip-3;
1709 /* Check that it ends with a JUMP, and that each JUMP
1710 has the same target */
1711 GET_OP;
1712 if (op != SRE_OP_JUMP)
1713 FAIL;
1714 GET_SKIP;
1715 if (target == NULL)
1716 target = code+skip-1;
1717 else if (code+skip-1 != target)
1718 FAIL;
1719 }
1720 }
1721 break;
1722
1723 case SRE_OP_REPEAT_ONE:
1724 case SRE_OP_MIN_REPEAT_ONE:
1725 {
1726 SRE_CODE min, max;
1727 GET_SKIP;
1728 GET_ARG; min = arg;
1729 GET_ARG; max = arg;
1730 if (min > max)
1731 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001732 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001733 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001734 if (!_validate_inner(code, code+skip-4, groups))
1735 FAIL;
1736 code += skip-4;
1737 GET_OP;
1738 if (op != SRE_OP_SUCCESS)
1739 FAIL;
1740 }
1741 break;
1742
1743 case SRE_OP_REPEAT:
1744 {
1745 SRE_CODE min, max;
1746 GET_SKIP;
1747 GET_ARG; min = arg;
1748 GET_ARG; max = arg;
1749 if (min > max)
1750 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001751 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001752 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001753 if (!_validate_inner(code, code+skip-3, groups))
1754 FAIL;
1755 code += skip-3;
1756 GET_OP;
1757 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1758 FAIL;
1759 }
1760 break;
1761
1762 case SRE_OP_GROUPREF:
1763 case SRE_OP_GROUPREF_IGNORE:
1764 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001765 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001766 FAIL;
1767 break;
1768
1769 case SRE_OP_GROUPREF_EXISTS:
1770 /* The regex syntax for this is: '(?(group)then|else)', where
1771 'group' is either an integer group number or a group name,
1772 'then' and 'else' are sub-regexes, and 'else' is optional. */
1773 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001774 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001775 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001776 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001777 code--; /* The skip is relative to the first arg! */
1778 /* There are two possibilities here: if there is both a 'then'
1779 part and an 'else' part, the generated code looks like:
1780
1781 GROUPREF_EXISTS
1782 <group>
1783 <skipyes>
1784 ...then part...
1785 JUMP
1786 <skipno>
1787 (<skipyes> jumps here)
1788 ...else part...
1789 (<skipno> jumps here)
1790
1791 If there is only a 'then' part, it looks like:
1792
1793 GROUPREF_EXISTS
1794 <group>
1795 <skip>
1796 ...then part...
1797 (<skip> jumps here)
1798
1799 There is no direct way to decide which it is, and we don't want
1800 to allow arbitrary jumps anywhere in the code; so we just look
1801 for a JUMP opcode preceding our skip target.
1802 */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001803 if (skip >= 3 && skip-3 < (Py_uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001804 code[skip-3] == SRE_OP_JUMP)
1805 {
1806 VTRACE(("both then and else parts present\n"));
1807 if (!_validate_inner(code+1, code+skip-3, groups))
1808 FAIL;
1809 code += skip-2; /* Position after JUMP, at <skipno> */
1810 GET_SKIP;
1811 if (!_validate_inner(code, code+skip-1, groups))
1812 FAIL;
1813 code += skip-1;
1814 }
1815 else {
1816 VTRACE(("only a then part present\n"));
1817 if (!_validate_inner(code+1, code+skip-1, groups))
1818 FAIL;
1819 code += skip-1;
1820 }
1821 break;
1822
1823 case SRE_OP_ASSERT:
1824 case SRE_OP_ASSERT_NOT:
1825 GET_SKIP;
1826 GET_ARG; /* 0 for lookahead, width for lookbehind */
1827 code--; /* Back up over arg to simplify math below */
1828 if (arg & 0x80000000)
1829 FAIL; /* Width too large */
1830 /* Stop 1 before the end; we check the SUCCESS below */
1831 if (!_validate_inner(code+1, code+skip-2, groups))
1832 FAIL;
1833 code += skip-2;
1834 GET_OP;
1835 if (op != SRE_OP_SUCCESS)
1836 FAIL;
1837 break;
1838
1839 default:
1840 FAIL;
1841
1842 }
1843 }
1844
1845 VTRACE(("okay\n"));
1846 return 1;
1847}
1848
1849static int
1850_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1851{
1852 if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
1853 FAIL;
1854 if (groups == 0) /* fix for simplejson */
1855 groups = 100; /* 100 groups should always be safe */
1856 return _validate_inner(code, end-1, groups);
1857}
1858
1859static int
1860_validate(PatternObject *self)
1861{
1862 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1863 {
1864 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1865 return 0;
1866 }
1867 else
1868 VTRACE(("Success!\n"));
1869 return 1;
1870}
1871
1872/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001873/* match methods */
1874
1875static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001876match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001877{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001878 Py_XDECREF(self->regs);
1879 Py_XDECREF(self->string);
1880 Py_DECREF(self->pattern);
1881 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001882}
1883
1884static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001885match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001886{
Serhiy Storchaka25324972013-10-16 12:46:28 +03001887 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001888 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001889 Py_buffer view;
1890 PyObject *result;
1891 void* ptr;
1892
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001893 if (index < 0 || index >= self->groups) {
1894 /* raise IndexError if we were given a bad group number */
1895 PyErr_SetString(
1896 PyExc_IndexError,
1897 "no such group"
1898 );
1899 return NULL;
1900 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001901
Fredrik Lundh6f013982000-07-03 18:44:21 +00001902 index *= 2;
1903
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001904 if (self->string == Py_None || self->mark[index] < 0) {
1905 /* return default value if the string or group is undefined */
1906 Py_INCREF(def);
1907 return def;
1908 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001909
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001910 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001911 if (ptr == NULL)
1912 return NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001913 result = getslice(isbytes, ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001914 self->string, self->mark[index], self->mark[index+1]);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001915 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001916 PyBuffer_Release(&view);
1917 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001918}
1919
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001920static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001921match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001922{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001923 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001924
Guido van Rossumddefaf32007-01-14 03:31:43 +00001925 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001926 /* Default value */
1927 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001928
Christian Heimes217cfd12007-12-02 14:31:20 +00001929 if (PyLong_Check(index))
1930 return PyLong_AsSsize_t(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001931
Fredrik Lundh6f013982000-07-03 18:44:21 +00001932 i = -1;
1933
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001934 if (self->pattern->groupindex) {
1935 index = PyObject_GetItem(self->pattern->groupindex, index);
1936 if (index) {
Neal Norwitz1fe5f382007-08-31 04:32:55 +00001937 if (PyLong_Check(index))
Christian Heimes217cfd12007-12-02 14:31:20 +00001938 i = PyLong_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001939 Py_DECREF(index);
1940 } else
1941 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001942 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001943
1944 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001945}
1946
1947static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001948match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001949{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001950 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001951}
1952
1953static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001954match_expand(MatchObject* self, PyObject* ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001955{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001956 /* delegate to Python code */
1957 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001958 SRE_PY_MODULE, "_expand",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001959 PyTuple_Pack(3, self->pattern, self, ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001960 );
1961}
1962
1963static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001964match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001965{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001966 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001967 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001968
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001969 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001970
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001971 switch (size) {
1972 case 0:
1973 result = match_getslice(self, Py_False, Py_None);
1974 break;
1975 case 1:
1976 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
1977 break;
1978 default:
1979 /* fetch multiple items */
1980 result = PyTuple_New(size);
1981 if (!result)
1982 return NULL;
1983 for (i = 0; i < size; i++) {
1984 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001985 self, PyTuple_GET_ITEM(args, i), Py_None
1986 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001987 if (!item) {
1988 Py_DECREF(result);
1989 return NULL;
1990 }
1991 PyTuple_SET_ITEM(result, i, item);
1992 }
1993 break;
1994 }
1995 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001996}
1997
1998static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001999match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002000{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002001 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002002 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002003
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002004 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002005 static char* kwlist[] = { "default", NULL };
Fredrik Lundh562586e2000-10-03 20:43:34 +00002006 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002007 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002008
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002009 result = PyTuple_New(self->groups-1);
2010 if (!result)
2011 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002012
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002013 for (index = 1; index < self->groups; index++) {
2014 PyObject* item;
2015 item = match_getslice_by_index(self, index, def);
2016 if (!item) {
2017 Py_DECREF(result);
2018 return NULL;
2019 }
2020 PyTuple_SET_ITEM(result, index-1, item);
2021 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002022
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002023 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002024}
2025
2026static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002027match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002028{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002029 PyObject* result;
2030 PyObject* keys;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002031 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002032
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002033 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002034 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00002035 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002036 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002037
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002038 result = PyDict_New();
2039 if (!result || !self->pattern->groupindex)
2040 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002041
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002042 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002043 if (!keys)
2044 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002045
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002046 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002047 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002048 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002049 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002050 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002051 if (!key)
2052 goto failed;
2053 value = match_getslice(self, key, def);
2054 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002055 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002056 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002057 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002058 status = PyDict_SetItem(result, key, value);
2059 Py_DECREF(value);
2060 if (status < 0)
2061 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002062 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002063
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002064 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002065
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002066 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002067
2068failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00002069 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002070 Py_DECREF(result);
2071 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002072}
2073
2074static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002075match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002076{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002077 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002078
Guido van Rossumddefaf32007-01-14 03:31:43 +00002079 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002080 if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002081 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002082
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002083 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002084
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002085 if (index < 0 || index >= self->groups) {
2086 PyErr_SetString(
2087 PyExc_IndexError,
2088 "no such group"
2089 );
2090 return NULL;
2091 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002092
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002093 /* mark is -1 if group is undefined */
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002094 return PyLong_FromSsize_t(self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002095}
2096
2097static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002098match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002099{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002100 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002101
Guido van Rossumddefaf32007-01-14 03:31:43 +00002102 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002103 if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002104 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002105
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002106 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002107
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002108 if (index < 0 || index >= self->groups) {
2109 PyErr_SetString(
2110 PyExc_IndexError,
2111 "no such group"
2112 );
2113 return NULL;
2114 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002115
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002116 /* mark is -1 if group is undefined */
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002117 return PyLong_FromSsize_t(self->mark[index*2+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002118}
2119
2120LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002121_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002122{
2123 PyObject* pair;
2124 PyObject* item;
2125
2126 pair = PyTuple_New(2);
2127 if (!pair)
2128 return NULL;
2129
Christian Heimes217cfd12007-12-02 14:31:20 +00002130 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002131 if (!item)
2132 goto error;
2133 PyTuple_SET_ITEM(pair, 0, item);
2134
Christian Heimes217cfd12007-12-02 14:31:20 +00002135 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002136 if (!item)
2137 goto error;
2138 PyTuple_SET_ITEM(pair, 1, item);
2139
2140 return pair;
2141
2142 error:
2143 Py_DECREF(pair);
2144 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002145}
2146
2147static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002148match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002149{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002150 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002151
Guido van Rossumddefaf32007-01-14 03:31:43 +00002152 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002153 if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002154 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002155
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002156 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002157
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002158 if (index < 0 || index >= self->groups) {
2159 PyErr_SetString(
2160 PyExc_IndexError,
2161 "no such group"
2162 );
2163 return NULL;
2164 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002165
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002166 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002167 return _pair(self->mark[index*2], self->mark[index*2+1]);
2168}
2169
2170static PyObject*
2171match_regs(MatchObject* self)
2172{
2173 PyObject* regs;
2174 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002175 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002176
2177 regs = PyTuple_New(self->groups);
2178 if (!regs)
2179 return NULL;
2180
2181 for (index = 0; index < self->groups; index++) {
2182 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2183 if (!item) {
2184 Py_DECREF(regs);
2185 return NULL;
2186 }
2187 PyTuple_SET_ITEM(regs, index, item);
2188 }
2189
2190 Py_INCREF(regs);
2191 self->regs = regs;
2192
2193 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002194}
2195
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002196static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002197match_copy(MatchObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002198{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002199#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002200 MatchObject* copy;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002201 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00002202
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002203 slots = 2 * (self->pattern->groups+1);
2204
2205 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2206 if (!copy)
2207 return NULL;
2208
2209 /* this value a constant, but any compiler should be able to
2210 figure that out all by itself */
2211 offset = offsetof(MatchObject, string);
2212
2213 Py_XINCREF(self->pattern);
2214 Py_XINCREF(self->string);
2215 Py_XINCREF(self->regs);
2216
2217 memcpy((char*) copy + offset, (char*) self + offset,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002218 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002219
2220 return (PyObject*) copy;
2221#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002222 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002223 return NULL;
2224#endif
2225}
2226
2227static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002228match_deepcopy(MatchObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002229{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002230#ifdef USE_BUILTIN_COPY
2231 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002232
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002233 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002234 if (!copy)
2235 return NULL;
2236
2237 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2238 !deepcopy(&copy->string, memo) ||
2239 !deepcopy(&copy->regs, memo)) {
2240 Py_DECREF(copy);
2241 return NULL;
2242 }
2243
2244#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002245 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2246 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002247#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002248}
2249
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002250PyDoc_STRVAR(match_doc,
2251"The result of re.match() and re.search().\n\
2252Match objects always have a boolean value of True.");
2253
2254PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002255"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002256 Return subgroup(s) of the match by indices or names.\n\
2257 For 0 returns the entire match.");
2258
2259PyDoc_STRVAR(match_start_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002260"start([group=0]) -> int.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002261 Return index of the start of the substring matched by group.");
2262
2263PyDoc_STRVAR(match_end_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002264"end([group=0]) -> int.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002265 Return index of the end of the substring matched by group.");
2266
2267PyDoc_STRVAR(match_span_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002268"span([group]) -> tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002269 For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).");
2270
2271PyDoc_STRVAR(match_groups_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002272"groups([default=None]) -> tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002273 Return a tuple containing all the subgroups of the match, from 1.\n\
2274 The default argument is used for groups\n\
2275 that did not participate in the match");
2276
2277PyDoc_STRVAR(match_groupdict_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002278"groupdict([default=None]) -> dict.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002279 Return a dictionary containing all the named subgroups of the match,\n\
2280 keyed by the subgroup name. The default argument is used for groups\n\
2281 that did not participate in the match");
2282
2283PyDoc_STRVAR(match_expand_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002284"expand(template) -> str.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002285 Return the string obtained by doing backslash substitution\n\
2286 on the string template, as done by the sub() method.");
2287
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002288static PyMethodDef match_methods[] = {
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002289 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2290 {"start", (PyCFunction) match_start, METH_VARARGS, match_start_doc},
2291 {"end", (PyCFunction) match_end, METH_VARARGS, match_end_doc},
2292 {"span", (PyCFunction) match_span, METH_VARARGS, match_span_doc},
2293 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS,
2294 match_groups_doc},
2295 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS,
2296 match_groupdict_doc},
2297 {"expand", (PyCFunction) match_expand, METH_O, match_expand_doc},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002298 {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
2299 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002300 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002301};
2302
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002303static PyObject *
2304match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002305{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002306 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002307 return PyLong_FromSsize_t(self->lastindex);
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002308 Py_INCREF(Py_None);
2309 return Py_None;
Guido van Rossumb700df92000-03-31 14:59:30 +00002310}
2311
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002312static PyObject *
2313match_lastgroup_get(MatchObject *self)
2314{
2315 if (self->pattern->indexgroup && self->lastindex >= 0) {
2316 PyObject* result = PySequence_GetItem(
2317 self->pattern->indexgroup, self->lastindex
2318 );
2319 if (result)
2320 return result;
2321 PyErr_Clear();
2322 }
2323 Py_INCREF(Py_None);
2324 return Py_None;
2325}
2326
2327static PyObject *
2328match_regs_get(MatchObject *self)
2329{
2330 if (self->regs) {
2331 Py_INCREF(self->regs);
2332 return self->regs;
2333 } else
2334 return match_regs(self);
2335}
2336
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002337static PyObject *
2338match_repr(MatchObject *self)
2339{
2340 PyObject *result;
2341 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2342 if (group0 == NULL)
2343 return NULL;
2344 result = PyUnicode_FromFormat(
2345 "<%s object; span=(%d, %d), match=%.50R>",
2346 Py_TYPE(self)->tp_name,
2347 self->mark[0], self->mark[1], group0);
2348 Py_DECREF(group0);
2349 return result;
2350}
2351
2352
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002353static PyGetSetDef match_getset[] = {
2354 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
2355 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
2356 {"regs", (getter)match_regs_get, (setter)NULL},
2357 {NULL}
2358};
2359
2360#define MATCH_OFF(x) offsetof(MatchObject, x)
2361static PyMemberDef match_members[] = {
2362 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
2363 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
2364 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
2365 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
2366 {NULL}
2367};
2368
Guido van Rossumb700df92000-03-31 14:59:30 +00002369/* FIXME: implement setattr("string", None) as a special case (to
2370 detach the associated string, if any */
2371
Neal Norwitz57c179c2006-03-22 07:18:02 +00002372static PyTypeObject Match_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002373 PyVarObject_HEAD_INIT(NULL,0)
2374 "_" SRE_MODULE ".SRE_Match",
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002375 sizeof(MatchObject), sizeof(Py_ssize_t),
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002376 (destructor)match_dealloc, /* tp_dealloc */
2377 0, /* tp_print */
2378 0, /* tp_getattr */
2379 0, /* tp_setattr */
2380 0, /* tp_reserved */
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002381 (reprfunc)match_repr, /* tp_repr */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002382 0, /* tp_as_number */
2383 0, /* tp_as_sequence */
2384 0, /* tp_as_mapping */
2385 0, /* tp_hash */
2386 0, /* tp_call */
2387 0, /* tp_str */
2388 0, /* tp_getattro */
2389 0, /* tp_setattro */
2390 0, /* tp_as_buffer */
2391 Py_TPFLAGS_DEFAULT, /* tp_flags */
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002392 match_doc, /* tp_doc */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002393 0, /* tp_traverse */
2394 0, /* tp_clear */
2395 0, /* tp_richcompare */
2396 0, /* tp_weaklistoffset */
2397 0, /* tp_iter */
2398 0, /* tp_iternext */
2399 match_methods, /* tp_methods */
2400 match_members, /* tp_members */
2401 match_getset, /* tp_getset */
Guido van Rossumb700df92000-03-31 14:59:30 +00002402};
2403
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002404static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002405pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002406{
2407 /* create match object (from state object) */
2408
2409 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002410 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002411 char* base;
2412 int n;
2413
2414 if (status > 0) {
2415
2416 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002417 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002418 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2419 2*(pattern->groups+1));
2420 if (!match)
2421 return NULL;
2422
2423 Py_INCREF(pattern);
2424 match->pattern = pattern;
2425
2426 Py_INCREF(state->string);
2427 match->string = state->string;
2428
2429 match->regs = NULL;
2430 match->groups = pattern->groups+1;
2431
2432 /* fill in group slices */
2433
2434 base = (char*) state->beginning;
2435 n = state->charsize;
2436
2437 match->mark[0] = ((char*) state->start - base) / n;
2438 match->mark[1] = ((char*) state->ptr - base) / n;
2439
2440 for (i = j = 0; i < pattern->groups; i++, j+=2)
2441 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2442 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2443 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2444 } else
2445 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2446
2447 match->pos = state->pos;
2448 match->endpos = state->endpos;
2449
2450 match->lastindex = state->lastindex;
2451
2452 return (PyObject*) match;
2453
2454 } else if (status == 0) {
2455
2456 /* no match */
2457 Py_INCREF(Py_None);
2458 return Py_None;
2459
2460 }
2461
2462 /* internal error */
2463 pattern_error(status);
2464 return NULL;
2465}
2466
2467
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002468/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002469/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002470
2471static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002472scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002473{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002474 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002475 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002476 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002477}
2478
2479static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002480scanner_match(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002481{
2482 SRE_STATE* state = &self->state;
2483 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002484 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002485
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002486 state_reset(state);
2487
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002488 state->ptr = state->start;
2489
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002490 status = sre_match(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002491 if (PyErr_Occurred())
2492 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002493
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002494 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002495 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002496
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00002497 if (status == 0 || state->ptr == state->start)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002498 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002499 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002500 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002501
2502 return match;
2503}
2504
2505
2506static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002507scanner_search(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002508{
2509 SRE_STATE* state = &self->state;
2510 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002511 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002512
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002513 state_reset(state);
2514
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002515 state->ptr = state->start;
2516
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002517 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002518 if (PyErr_Occurred())
2519 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002520
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002521 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002522 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002523
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00002524 if (status == 0 || state->ptr == state->start)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002525 state->start = (void*) ((char*) state->ptr + state->charsize);
2526 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002527 state->start = state->ptr;
2528
2529 return match;
2530}
2531
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002532static PyMethodDef scanner_methods[] = {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002533 {"match", (PyCFunction) scanner_match, METH_NOARGS},
2534 {"search", (PyCFunction) scanner_search, METH_NOARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002535 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002536};
2537
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002538#define SCAN_OFF(x) offsetof(ScannerObject, x)
2539static PyMemberDef scanner_members[] = {
Ezio Melotti7c8c1ea2011-09-29 01:00:19 +03002540 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002541 {NULL} /* Sentinel */
2542};
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002543
Neal Norwitz57c179c2006-03-22 07:18:02 +00002544static PyTypeObject Scanner_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002545 PyVarObject_HEAD_INIT(NULL, 0)
2546 "_" SRE_MODULE ".SRE_Scanner",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002547 sizeof(ScannerObject), 0,
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002548 (destructor)scanner_dealloc,/* tp_dealloc */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002549 0, /* tp_print */
2550 0, /* tp_getattr */
2551 0, /* tp_setattr */
2552 0, /* tp_reserved */
2553 0, /* tp_repr */
2554 0, /* tp_as_number */
2555 0, /* tp_as_sequence */
2556 0, /* tp_as_mapping */
2557 0, /* tp_hash */
2558 0, /* tp_call */
2559 0, /* tp_str */
2560 0, /* tp_getattro */
2561 0, /* tp_setattro */
2562 0, /* tp_as_buffer */
2563 Py_TPFLAGS_DEFAULT, /* tp_flags */
2564 0, /* tp_doc */
2565 0, /* tp_traverse */
2566 0, /* tp_clear */
2567 0, /* tp_richcompare */
2568 0, /* tp_weaklistoffset */
2569 0, /* tp_iter */
2570 0, /* tp_iternext */
2571 scanner_methods, /* tp_methods */
2572 scanner_members, /* tp_members */
2573 0, /* tp_getset */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002574};
2575
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002576static PyObject*
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06002577pattern_scanner(PatternObject* pattern, PyObject* args, PyObject* kw)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002578{
2579 /* create search state object */
2580
2581 ScannerObject* self;
2582
2583 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002584 Py_ssize_t start = 0;
2585 Py_ssize_t end = PY_SSIZE_T_MAX;
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06002586 static char* kwlist[] = { "source", "pos", "endpos", NULL };
2587 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:scanner", kwlist,
2588 &string, &start, &end))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002589 return NULL;
2590
2591 /* create scanner object */
2592 self = PyObject_NEW(ScannerObject, &Scanner_Type);
2593 if (!self)
2594 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002595 self->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002596
2597 string = state_init(&self->state, pattern, string, start, end);
2598 if (!string) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002599 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002600 return NULL;
2601 }
2602
2603 Py_INCREF(pattern);
2604 self->pattern = (PyObject*) pattern;
2605
2606 return (PyObject*) self;
2607}
2608
Guido van Rossumb700df92000-03-31 14:59:30 +00002609static PyMethodDef _functions[] = {
Neal Norwitzb0493252002-03-31 14:44:22 +00002610 {"compile", _compile, METH_VARARGS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002611 {"getcodesize", sre_codesize, METH_NOARGS},
Neal Norwitzb0493252002-03-31 14:44:22 +00002612 {"getlower", sre_getlower, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002613 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002614};
2615
Martin v. Löwis1a214512008-06-11 05:26:20 +00002616static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002617 PyModuleDef_HEAD_INIT,
2618 "_" SRE_MODULE,
2619 NULL,
2620 -1,
2621 _functions,
2622 NULL,
2623 NULL,
2624 NULL,
2625 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002626};
2627
2628PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002629{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002630 PyObject* m;
2631 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002632 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002633
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002634 /* Patch object types */
2635 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2636 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002637 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002638
Martin v. Löwis1a214512008-06-11 05:26:20 +00002639 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002640 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002641 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002642 d = PyModule_GetDict(m);
2643
Christian Heimes217cfd12007-12-02 14:31:20 +00002644 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002645 if (x) {
2646 PyDict_SetItemString(d, "MAGIC", x);
2647 Py_DECREF(x);
2648 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002649
Christian Heimes217cfd12007-12-02 14:31:20 +00002650 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002651 if (x) {
2652 PyDict_SetItemString(d, "CODESIZE", x);
2653 Py_DECREF(x);
2654 }
2655
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002656 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2657 if (x) {
2658 PyDict_SetItemString(d, "MAXREPEAT", x);
2659 Py_DECREF(x);
2660 }
2661
Neal Norwitzfe537132007-08-26 03:55:15 +00002662 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002663 if (x) {
2664 PyDict_SetItemString(d, "copyright", x);
2665 Py_DECREF(x);
2666 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002667 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002668}
2669
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002670/* vim:ts=4:sw=4:et
2671*/