blob: 4dcaec1c31f56f2bc1ad0efc5871a655bc39b853 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000038static char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000063/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064
65/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000066#define USE_FAST_SEARCH
67
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000068/* enables copy/deepcopy handling (work in progress) */
69#undef USE_BUILTIN_COPY
70
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000071/* -------------------------------------------------------------------- */
72
Fredrik Lundh80946112000-06-29 18:03:25 +000073#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000074#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000075#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000076/* fastest possible local call under MSVC */
77#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000078#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000079#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080#else
81#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000082#endif
83
84/* error codes */
85#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000086#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000087#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000088#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000089#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000090
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000091#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000092#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000093#else
94#define TRACE(v)
95#endif
96
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000097/* -------------------------------------------------------------------- */
98/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000099
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000100/* default character predicates (run sre_chars.py to regenerate tables) */
101
102#define SRE_DIGIT_MASK 1
103#define SRE_SPACE_MASK 2
104#define SRE_LINEBREAK_MASK 4
105#define SRE_ALNUM_MASK 8
106#define SRE_WORD_MASK 16
107
Fredrik Lundh21009b92001-09-18 18:47:09 +0000108/* FIXME: this assumes ASCII. create tables in init_sre() instead */
109
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000110static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1112, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1120, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
11325, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
11424, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1150, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
11624, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
117
Fredrik Lundhb389df32000-06-29 12:48:37 +0000118static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000011910, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
12027, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
12144, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
12261, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
123108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
124122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
125106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
126120, 121, 122, 123, 124, 125, 126, 127 };
127
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000128#define SRE_IS_DIGIT(ch)\
129 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
130#define SRE_IS_SPACE(ch)\
131 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
132#define SRE_IS_LINEBREAK(ch)\
133 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
134#define SRE_IS_ALNUM(ch)\
135 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
136#define SRE_IS_WORD(ch)\
137 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000138
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000139static unsigned int sre_lower(unsigned int ch)
140{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000141 return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000142}
143
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000144/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000145/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
146 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000147#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000148#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
149
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000150static unsigned int sre_lower_locale(unsigned int ch)
151{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000152 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000153}
154
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000155/* unicode-specific character predicates */
156
Victor Stinner0058b862011-09-29 03:27:47 +0200157#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
158#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
159#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
160#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
161#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000162
163static unsigned int sre_lower_unicode(unsigned int ch)
164{
Victor Stinner0058b862011-09-29 03:27:47 +0200165 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000166}
167
Guido van Rossumb700df92000-03-31 14:59:30 +0000168LOCAL(int)
169sre_category(SRE_CODE category, unsigned int ch)
170{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000171 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000172
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000173 case SRE_CATEGORY_DIGIT:
174 return SRE_IS_DIGIT(ch);
175 case SRE_CATEGORY_NOT_DIGIT:
176 return !SRE_IS_DIGIT(ch);
177 case SRE_CATEGORY_SPACE:
178 return SRE_IS_SPACE(ch);
179 case SRE_CATEGORY_NOT_SPACE:
180 return !SRE_IS_SPACE(ch);
181 case SRE_CATEGORY_WORD:
182 return SRE_IS_WORD(ch);
183 case SRE_CATEGORY_NOT_WORD:
184 return !SRE_IS_WORD(ch);
185 case SRE_CATEGORY_LINEBREAK:
186 return SRE_IS_LINEBREAK(ch);
187 case SRE_CATEGORY_NOT_LINEBREAK:
188 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000189
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000190 case SRE_CATEGORY_LOC_WORD:
191 return SRE_LOC_IS_WORD(ch);
192 case SRE_CATEGORY_LOC_NOT_WORD:
193 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000194
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000195 case SRE_CATEGORY_UNI_DIGIT:
196 return SRE_UNI_IS_DIGIT(ch);
197 case SRE_CATEGORY_UNI_NOT_DIGIT:
198 return !SRE_UNI_IS_DIGIT(ch);
199 case SRE_CATEGORY_UNI_SPACE:
200 return SRE_UNI_IS_SPACE(ch);
201 case SRE_CATEGORY_UNI_NOT_SPACE:
202 return !SRE_UNI_IS_SPACE(ch);
203 case SRE_CATEGORY_UNI_WORD:
204 return SRE_UNI_IS_WORD(ch);
205 case SRE_CATEGORY_UNI_NOT_WORD:
206 return !SRE_UNI_IS_WORD(ch);
207 case SRE_CATEGORY_UNI_LINEBREAK:
208 return SRE_UNI_IS_LINEBREAK(ch);
209 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
210 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000211 }
212 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000213}
214
215/* helpers */
216
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000217static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000218data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000219{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000220 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000221 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000222 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000223 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000224 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000225}
226
227static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000228data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000229{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000230 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000231 minsize = state->data_stack_base+size;
232 cursize = state->data_stack_size;
233 if (cursize < minsize) {
234 void* stack;
235 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300236 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000237 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000238 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000239 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000240 return SRE_ERROR_MEMORY;
241 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000242 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000243 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000244 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000245 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000246}
247
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000248/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000249
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300250#define SRE_CHAR Py_UCS1
251#define SIZEOF_SRE_CHAR 1
252#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300253#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000254
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300255/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000256
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300257#define SRE_CHAR Py_UCS2
258#define SIZEOF_SRE_CHAR 2
259#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300260#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000261
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300262/* generate 32-bit unicode version */
263
264#define SRE_CHAR Py_UCS4
265#define SIZEOF_SRE_CHAR 4
266#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300267#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000268
269/* -------------------------------------------------------------------- */
270/* factories and destructors */
271
272/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100273static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600274static PyObject*pattern_scanner(PatternObject*, PyObject*, PyObject* kw);
Guido van Rossumb700df92000-03-31 14:59:30 +0000275
276static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000277sre_codesize(PyObject* self, PyObject *unused)
Guido van Rossumb700df92000-03-31 14:59:30 +0000278{
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100279 return PyLong_FromSize_t(sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +0000280}
281
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000282static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +0000283sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000284{
285 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000286 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000287 return NULL;
288 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000289 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000290 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000291 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundhb389df32000-06-29 12:48:37 +0000292 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000293}
294
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000295LOCAL(void)
296state_reset(SRE_STATE* state)
297{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000298 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000299 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000300
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000301 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000302 state->lastindex = -1;
303
304 state->repeat = NULL;
305
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000306 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000307}
308
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000309static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200310getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300311 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600312 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000313{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000314 /* given a python object, return a data pointer, a length (in
315 characters), and a character size. return NULL if the object
316 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000317
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000318 /* Unicode objects do not support the buffer API. So, get the data
319 directly instead. */
320 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200321 if (PyUnicode_READY(string) == -1)
322 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200323 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200324 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300325 *p_isbytes = 0;
326 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000327 }
328
Victor Stinner0058b862011-09-29 03:27:47 +0200329 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300330 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
331 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
332 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000333 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000334
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300335 *p_length = view->len;
336 *p_charsize = 1;
337 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000338
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300339 if (view->buf == NULL) {
340 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
341 PyBuffer_Release(view);
342 view->buf = NULL;
343 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000344 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300345 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000346}
347
348LOCAL(PyObject*)
349state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000350 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000351{
352 /* prepare state object */
353
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000354 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300355 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000356 void* ptr;
357
358 memset(state, 0, sizeof(SRE_STATE));
359
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000360 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000361 state->lastindex = -1;
362
Benjamin Petersone48944b2012-03-07 14:50:25 -0600363 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300364 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000365 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600366 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000367
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300368 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600369 PyErr_SetString(PyExc_TypeError,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300370 "can't use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600371 goto err;
372 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300373 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600374 PyErr_SetString(PyExc_TypeError,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300375 "can't use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600376 goto err;
377 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000378
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000379 /* adjust boundaries */
380 if (start < 0)
381 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000382 else if (start > length)
383 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000384
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000385 if (end < 0)
386 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000387 else if (end > length)
388 end = length;
389
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300390 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000391 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000392
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000393 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000394
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000395 state->start = (void*) ((char*) ptr + start * state->charsize);
396 state->end = (void*) ((char*) ptr + end * state->charsize);
397
398 Py_INCREF(string);
399 state->string = string;
400 state->pos = start;
401 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000402
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000403 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000404 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000405 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000406 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000407 else
Fredrik Lundhb389df32000-06-29 12:48:37 +0000408 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000409
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000410 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600411 err:
412 if (state->buffer.buf)
413 PyBuffer_Release(&state->buffer);
414 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000415}
416
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000417LOCAL(void)
418state_fini(SRE_STATE* state)
419{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600420 if (state->buffer.buf)
421 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000422 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000423 data_stack_dealloc(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000424}
425
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000426/* calculate offset from start of string */
427#define STATE_OFFSET(state, member)\
428 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
429
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000430LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300431getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300432 PyObject* string, Py_ssize_t start, Py_ssize_t end)
433{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300434 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300435 if (PyBytes_CheckExact(string) &&
436 start == 0 && end == PyBytes_GET_SIZE(string)) {
437 Py_INCREF(string);
438 return string;
439 }
440 return PyBytes_FromStringAndSize(
441 (const char *)ptr + start, end - start);
442 }
443 else {
444 return PyUnicode_Substring(string, start, end);
445 }
446}
447
448LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000449state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000450{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000451 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000452
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000453 index = (index - 1) * 2;
454
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000455 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000456 if (empty)
457 /* want empty string */
458 i = j = 0;
459 else {
460 Py_INCREF(Py_None);
461 return Py_None;
462 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000463 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000464 i = STATE_OFFSET(state, state->mark[index]);
465 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000466 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000467
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300468 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000469}
470
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000471static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100472pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000473{
474 switch (status) {
475 case SRE_ERROR_RECURSION_LIMIT:
476 PyErr_SetString(
477 PyExc_RuntimeError,
478 "maximum recursion limit exceeded"
479 );
480 break;
481 case SRE_ERROR_MEMORY:
482 PyErr_NoMemory();
483 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000484 case SRE_ERROR_INTERRUPTED:
485 /* An exception has already been raised, so let it fly */
486 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000487 default:
488 /* other error codes indicate compiler/engine bugs */
489 PyErr_SetString(
490 PyExc_RuntimeError,
491 "internal error in regular expression engine"
492 );
493 }
494}
495
Guido van Rossumb700df92000-03-31 14:59:30 +0000496static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000497pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000498{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000499 if (self->weakreflist != NULL)
500 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000501 Py_XDECREF(self->pattern);
502 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000503 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000504 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000505}
506
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300507LOCAL(Py_ssize_t)
508sre_match(SRE_STATE* state, SRE_CODE* pattern)
509{
510 if (state->charsize == 1)
511 return sre_ucs1_match(state, pattern);
512 if (state->charsize == 2)
513 return sre_ucs2_match(state, pattern);
514 assert(state->charsize == 4);
515 return sre_ucs4_match(state, pattern);
516}
517
518LOCAL(Py_ssize_t)
519sre_search(SRE_STATE* state, SRE_CODE* pattern)
520{
521 if (state->charsize == 1)
522 return sre_ucs1_search(state, pattern);
523 if (state->charsize == 2)
524 return sre_ucs2_search(state, pattern);
525 assert(state->charsize == 4);
526 return sre_ucs4_search(state, pattern);
527}
528
Larry Hastingsdf7c22b2014-01-07 14:25:26 -0800529/*[clinic input]
Larry Hastings16c51912014-01-07 11:53:01 -0800530module _sre
Larry Hastingsc2047262014-01-25 20:43:29 -0800531class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
Larry Hastings16c51912014-01-07 11:53:01 -0800532
533_sre.SRE_Pattern.match as pattern_match
534
Larry Hastings16c51912014-01-07 11:53:01 -0800535 pattern: object
536 pos: Py_ssize_t = 0
537 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
538
539Matches zero or more characters at the beginning of the string.
Larry Hastingsdf7c22b2014-01-07 14:25:26 -0800540[clinic start generated code]*/
Larry Hastings16c51912014-01-07 11:53:01 -0800541
542PyDoc_STRVAR(pattern_match__doc__,
Larry Hastings581ee362014-01-28 05:00:08 -0800543"sig=($self, pattern, pos=0, endpos=sys.maxsize)\n"
Larry Hastings16c51912014-01-07 11:53:01 -0800544"Matches zero or more characters at the beginning of the string.");
545
546#define PATTERN_MATCH_METHODDEF \
547 {"match", (PyCFunction)pattern_match, METH_VARARGS|METH_KEYWORDS, pattern_match__doc__},
548
549static PyObject *
550pattern_match_impl(PatternObject *self, PyObject *pattern, Py_ssize_t pos, Py_ssize_t endpos);
551
552static PyObject *
Larry Hastings5c661892014-01-24 06:17:25 -0800553pattern_match(PatternObject *self, PyObject *args, PyObject *kwargs)
Larry Hastings16c51912014-01-07 11:53:01 -0800554{
555 PyObject *return_value = NULL;
556 static char *_keywords[] = {"pattern", "pos", "endpos", NULL};
557 PyObject *pattern;
558 Py_ssize_t pos = 0;
559 Py_ssize_t endpos = PY_SSIZE_T_MAX;
560
561 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
562 "O|nn:match", _keywords,
563 &pattern, &pos, &endpos))
564 goto exit;
Larry Hastings5c661892014-01-24 06:17:25 -0800565 return_value = pattern_match_impl(self, pattern, pos, endpos);
Larry Hastings16c51912014-01-07 11:53:01 -0800566
567exit:
568 return return_value;
569}
570
571static PyObject *
572pattern_match_impl(PatternObject *self, PyObject *pattern, Py_ssize_t pos, Py_ssize_t endpos)
Larry Hastings581ee362014-01-28 05:00:08 -0800573/*[clinic end generated code: output=9f5b785661677848 input=26f9fd31befe46b9]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000574{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000575 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100576 Py_ssize_t status;
Larry Hastings16c51912014-01-07 11:53:01 -0800577 PyObject *string;
Guido van Rossumb700df92000-03-31 14:59:30 +0000578
Larry Hastings16c51912014-01-07 11:53:01 -0800579 string = state_init(&state, (PatternObject *)self, pattern, pos, endpos);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000580 if (!string)
581 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000582
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000583 state.ptr = state.start;
584
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000585 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
586
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300587 status = sre_match(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000588
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000589 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Thomas Wouters89f507f2006-12-13 04:49:30 +0000590 if (PyErr_Occurred())
591 return NULL;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000592
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000593 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +0000594
Larry Hastings16c51912014-01-07 11:53:01 -0800595 return (PyObject *)pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +0000596}
597
598static PyObject*
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200599pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kw)
600{
601 SRE_STATE state;
602 Py_ssize_t status;
603
604 PyObject* string;
605 Py_ssize_t start = 0;
606 Py_ssize_t end = PY_SSIZE_T_MAX;
607 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
608 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:fullmatch", kwlist,
609 &string, &start, &end))
610 return NULL;
611
612 string = state_init(&state, self, string, start, end);
613 if (!string)
614 return NULL;
615
616 state.match_all = 1;
617 state.ptr = state.start;
618
619 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
620
621 status = sre_match(&state, PatternObject_GetCode(self));
622
623 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
624 if (PyErr_Occurred())
625 return NULL;
626
627 state_fini(&state);
628
629 return pattern_new_match(self, &state, status);
630}
631
632static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +0000633pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +0000634{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000635 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100636 Py_ssize_t status;
Guido van Rossumb700df92000-03-31 14:59:30 +0000637
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000638 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000639 Py_ssize_t start = 0;
640 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +0000641 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000642 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:search", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +0000643 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000644 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000645
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000646 string = state_init(&state, self, string, start, end);
647 if (!string)
648 return NULL;
649
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000650 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
651
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300652 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000653
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000654 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
655
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000656 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +0000657
Thomas Wouters89f507f2006-12-13 04:49:30 +0000658 if (PyErr_Occurred())
659 return NULL;
660
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000661 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +0000662}
663
664static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000665call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000666{
667 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000668 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000669 PyObject* func;
670 PyObject* result;
671
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000672 if (!args)
673 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000674 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000675 if (!name)
676 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000677 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000678 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000679 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000680 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000681 func = PyObject_GetAttrString(mod, function);
682 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000683 if (!func)
684 return NULL;
685 result = PyObject_CallObject(func, args);
686 Py_DECREF(func);
687 Py_DECREF(args);
688 return result;
689}
690
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000691#ifdef USE_BUILTIN_COPY
692static int
693deepcopy(PyObject** object, PyObject* memo)
694{
695 PyObject* copy;
696
697 copy = call(
698 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000699 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000700 );
701 if (!copy)
702 return 0;
703
704 Py_DECREF(*object);
705 *object = copy;
706
707 return 1; /* success */
708}
709#endif
710
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000711static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +0000712pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +0000713{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000714 SRE_STATE state;
715 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100716 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000717 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000718
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000719 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000720 Py_ssize_t start = 0;
721 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +0000722 static char* kwlist[] = { "source", "pos", "endpos", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000723 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +0000724 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000725 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000726
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000727 string = state_init(&state, self, string, start, end);
728 if (!string)
729 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000730
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000731 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000732 if (!list) {
733 state_fini(&state);
734 return NULL;
735 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000736
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000737 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000738
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000739 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000740
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000741 state_reset(&state);
742
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000743 state.ptr = state.start;
744
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300745 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300746 if (PyErr_Occurred())
747 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000748
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000749 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000750 if (status == 0)
751 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000752 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000753 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000754 }
Tim Peters3d563502006-01-21 02:47:53 +0000755
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000756 /* don't bother to build a match object */
757 switch (self->groups) {
758 case 0:
759 b = STATE_OFFSET(&state, state.start);
760 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300761 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300762 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000763 if (!item)
764 goto error;
765 break;
766 case 1:
767 item = state_getslice(&state, 1, string, 1);
768 if (!item)
769 goto error;
770 break;
771 default:
772 item = PyTuple_New(self->groups);
773 if (!item)
774 goto error;
775 for (i = 0; i < self->groups; i++) {
776 PyObject* o = state_getslice(&state, i+1, string, 1);
777 if (!o) {
778 Py_DECREF(item);
779 goto error;
780 }
781 PyTuple_SET_ITEM(item, i, o);
782 }
783 break;
784 }
785
786 status = PyList_Append(list, item);
787 Py_DECREF(item);
788 if (status < 0)
789 goto error;
790
791 if (state.ptr == state.start)
792 state.start = (void*) ((char*) state.ptr + state.charsize);
793 else
794 state.start = state.ptr;
795
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000796 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000797
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000798 state_fini(&state);
799 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000800
801error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000802 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000803 state_fini(&state);
804 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000805
Guido van Rossumb700df92000-03-31 14:59:30 +0000806}
807
Fredrik Lundh703ce812001-10-24 22:16:30 +0000808static PyObject*
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600809pattern_finditer(PatternObject* pattern, PyObject* args, PyObject* kw)
Fredrik Lundh703ce812001-10-24 22:16:30 +0000810{
811 PyObject* scanner;
812 PyObject* search;
813 PyObject* iterator;
814
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600815 scanner = pattern_scanner(pattern, args, kw);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000816 if (!scanner)
817 return NULL;
818
819 search = PyObject_GetAttrString(scanner, "search");
820 Py_DECREF(scanner);
821 if (!search)
822 return NULL;
823
824 iterator = PyCallIter_New(search, Py_None);
825 Py_DECREF(search);
826
827 return iterator;
828}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000829
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000830static PyObject*
831pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
832{
833 SRE_STATE state;
834 PyObject* list;
835 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100836 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000837 Py_ssize_t n;
838 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000839 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000840
841 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000842 Py_ssize_t maxsplit = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +0000843 static char* kwlist[] = { "source", "maxsplit", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000844 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|n:split", kwlist,
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000845 &string, &maxsplit))
846 return NULL;
847
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000848 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000849 if (!string)
850 return NULL;
851
852 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000853 if (!list) {
854 state_fini(&state);
855 return NULL;
856 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000857
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000858 n = 0;
859 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000860
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000861 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000862
863 state_reset(&state);
864
865 state.ptr = state.start;
866
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300867 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300868 if (PyErr_Occurred())
869 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000870
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000871 if (status <= 0) {
872 if (status == 0)
873 break;
874 pattern_error(status);
875 goto error;
876 }
Tim Peters3d563502006-01-21 02:47:53 +0000877
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000878 if (state.start == state.ptr) {
879 if (last == state.end)
880 break;
881 /* skip one character */
882 state.start = (void*) ((char*) state.ptr + state.charsize);
883 continue;
884 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000885
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000886 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300887 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000888 string, STATE_OFFSET(&state, last),
889 STATE_OFFSET(&state, state.start)
890 );
891 if (!item)
892 goto error;
893 status = PyList_Append(list, item);
894 Py_DECREF(item);
895 if (status < 0)
896 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000897
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000898 /* add groups (if any) */
899 for (i = 0; i < self->groups; i++) {
900 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000901 if (!item)
902 goto error;
903 status = PyList_Append(list, item);
904 Py_DECREF(item);
905 if (status < 0)
906 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000907 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000908
909 n = n + 1;
910
911 last = state.start = state.ptr;
912
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000913 }
914
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000915 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300916 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000917 string, STATE_OFFSET(&state, last), state.endpos
918 );
919 if (!item)
920 goto error;
921 status = PyList_Append(list, item);
922 Py_DECREF(item);
923 if (status < 0)
924 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000925
926 state_fini(&state);
927 return list;
928
929error:
930 Py_DECREF(list);
931 state_fini(&state);
932 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000933
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000934}
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000935
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000936static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000937pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000938 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000939{
940 SRE_STATE state;
941 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +0300942 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000943 PyObject* item;
944 PyObject* filter;
945 PyObject* args;
946 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000947 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100948 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000949 Py_ssize_t n;
950 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300951 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000952 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600953 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000954
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000955 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +0000956 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000957 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +0000958 Py_INCREF(filter);
959 filter_is_callable = 1;
960 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000961 /* if not callable, check if it's a literal string */
962 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600963 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300964 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200965 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000966 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300967 if (charsize == 1)
968 literal = memchr(ptr, '\\', n) == NULL;
969 else
970 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000971 } else {
972 PyErr_Clear();
973 literal = 0;
974 }
Benjamin Petersone48944b2012-03-07 14:50:25 -0600975 if (view.buf)
976 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000977 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000978 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000979 Py_INCREF(filter);
980 filter_is_callable = 0;
981 } else {
982 /* not a literal; hand it over to the template compiler */
983 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +0000984 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000985 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000986 );
987 if (!filter)
988 return NULL;
989 filter_is_callable = PyCallable_Check(filter);
990 }
Fredrik Lundhdac58492001-10-21 21:48:30 +0000991 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000992
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000993 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh82b23072001-12-09 16:13:15 +0000994 if (!string) {
995 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000996 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +0000997 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000998
999 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001000 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001001 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001002 state_fini(&state);
1003 return NULL;
1004 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001005
1006 n = i = 0;
1007
1008 while (!count || n < count) {
1009
1010 state_reset(&state);
1011
1012 state.ptr = state.start;
1013
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001014 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001015 if (PyErr_Occurred())
1016 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001017
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001018 if (status <= 0) {
1019 if (status == 0)
1020 break;
1021 pattern_error(status);
1022 goto error;
1023 }
Tim Peters3d563502006-01-21 02:47:53 +00001024
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001025 b = STATE_OFFSET(&state, state.start);
1026 e = STATE_OFFSET(&state, state.ptr);
1027
1028 if (i < b) {
1029 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001030 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001031 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001032 if (!item)
1033 goto error;
1034 status = PyList_Append(list, item);
1035 Py_DECREF(item);
1036 if (status < 0)
1037 goto error;
1038
1039 } else if (i == b && i == e && n > 0)
1040 /* ignore empty match on latest position */
1041 goto next;
1042
1043 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001044 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001045 match = pattern_new_match(self, &state, 1);
1046 if (!match)
1047 goto error;
Raymond Hettinger8ae46892003-10-12 19:09:37 +00001048 args = PyTuple_Pack(1, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001049 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00001050 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001051 goto error;
1052 }
1053 item = PyObject_CallObject(filter, args);
1054 Py_DECREF(args);
1055 Py_DECREF(match);
1056 if (!item)
1057 goto error;
1058 } else {
1059 /* filter is literal string */
1060 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001061 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001062 }
1063
1064 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001065 if (item != Py_None) {
1066 status = PyList_Append(list, item);
1067 Py_DECREF(item);
1068 if (status < 0)
1069 goto error;
1070 }
Tim Peters3d563502006-01-21 02:47:53 +00001071
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001072 i = e;
1073 n = n + 1;
1074
1075next:
1076 /* move on */
1077 if (state.ptr == state.start)
1078 state.start = (void*) ((char*) state.ptr + state.charsize);
1079 else
1080 state.start = state.ptr;
1081
1082 }
1083
1084 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001085 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001086 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001087 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001088 if (!item)
1089 goto error;
1090 status = PyList_Append(list, item);
1091 Py_DECREF(item);
1092 if (status < 0)
1093 goto error;
1094 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001095
1096 state_fini(&state);
1097
Guido van Rossum4e173842001-12-07 04:25:10 +00001098 Py_DECREF(filter);
1099
Fredrik Lundhdac58492001-10-21 21:48:30 +00001100 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001101 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001102 if (!joiner) {
1103 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001104 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001105 }
1106 if (PyList_GET_SIZE(list) == 0) {
1107 Py_DECREF(list);
1108 item = joiner;
1109 }
1110 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001111 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001112 item = _PyBytes_Join(joiner, list);
1113 else
1114 item = PyUnicode_Join(joiner, list);
1115 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001116 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001117 if (!item)
1118 return NULL;
1119 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001120
1121 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001122 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001123
1124 return item;
1125
1126error:
1127 Py_DECREF(list);
1128 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001129 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001130 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001131
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001132}
1133
1134static PyObject*
1135pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
1136{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001137 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001138 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001139 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001140 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001141 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001142 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001143 return NULL;
1144
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001145 return pattern_subx(self, ptemplate, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001146}
1147
1148static PyObject*
1149pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
1150{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001151 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001152 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001153 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001154 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001155 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001156 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001157 return NULL;
1158
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001159 return pattern_subx(self, ptemplate, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001160}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001161
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001162static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001163pattern_copy(PatternObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001164{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001165#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001166 PatternObject* copy;
1167 int offset;
1168
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001169 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
1170 if (!copy)
1171 return NULL;
1172
1173 offset = offsetof(PatternObject, groups);
1174
1175 Py_XINCREF(self->groupindex);
1176 Py_XINCREF(self->indexgroup);
1177 Py_XINCREF(self->pattern);
1178
1179 memcpy((char*) copy + offset, (char*) self + offset,
1180 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00001181 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001182
1183 return (PyObject*) copy;
1184#else
1185 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
1186 return NULL;
1187#endif
1188}
1189
1190static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001191pattern_deepcopy(PatternObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001192{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001193#ifdef USE_BUILTIN_COPY
1194 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00001195
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001196 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001197 if (!copy)
1198 return NULL;
1199
1200 if (!deepcopy(&copy->groupindex, memo) ||
1201 !deepcopy(&copy->indexgroup, memo) ||
1202 !deepcopy(&copy->pattern, memo)) {
1203 Py_DECREF(copy);
1204 return NULL;
1205 }
1206
1207#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001208 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
1209 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001210#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001211}
1212
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001213static PyObject *
1214pattern_repr(PatternObject *obj)
1215{
1216 static const struct {
1217 const char *name;
1218 int value;
1219 } flag_names[] = {
1220 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1221 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1222 {"re.LOCALE", SRE_FLAG_LOCALE},
1223 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1224 {"re.DOTALL", SRE_FLAG_DOTALL},
1225 {"re.UNICODE", SRE_FLAG_UNICODE},
1226 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1227 {"re.DEBUG", SRE_FLAG_DEBUG},
1228 {"re.ASCII", SRE_FLAG_ASCII},
1229 };
1230 PyObject *result = NULL;
1231 PyObject *flag_items;
1232 int i;
1233 int flags = obj->flags;
1234
1235 /* Omit re.UNICODE for valid string patterns. */
1236 if (obj->isbytes == 0 &&
1237 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1238 SRE_FLAG_UNICODE)
1239 flags &= ~SRE_FLAG_UNICODE;
1240
1241 flag_items = PyList_New(0);
1242 if (!flag_items)
1243 return NULL;
1244
1245 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1246 if (flags & flag_names[i].value) {
1247 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1248 if (!item)
1249 goto done;
1250
1251 if (PyList_Append(flag_items, item) < 0) {
1252 Py_DECREF(item);
1253 goto done;
1254 }
1255 Py_DECREF(item);
1256 flags &= ~flag_names[i].value;
1257 }
1258 }
1259 if (flags) {
1260 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1261 if (!item)
1262 goto done;
1263
1264 if (PyList_Append(flag_items, item) < 0) {
1265 Py_DECREF(item);
1266 goto done;
1267 }
1268 Py_DECREF(item);
1269 }
1270
1271 if (PyList_Size(flag_items) > 0) {
1272 PyObject *flags_result;
1273 PyObject *sep = PyUnicode_FromString("|");
1274 if (!sep)
1275 goto done;
1276 flags_result = PyUnicode_Join(sep, flag_items);
1277 Py_DECREF(sep);
1278 if (!flags_result)
1279 goto done;
1280 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1281 obj->pattern, flags_result);
1282 Py_DECREF(flags_result);
1283 }
1284 else {
1285 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1286 }
1287
1288done:
1289 Py_DECREF(flag_items);
1290 return result;
1291}
1292
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02001293PyDoc_STRVAR(pattern_fullmatch_doc,
1294"fullmatch(string[, pos[, endpos]]) -> match object or None.\n\
1295 Matches against all of the string");
1296
Raymond Hettinger94478742004-09-24 04:31:19 +00001297PyDoc_STRVAR(pattern_search_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001298"search(string[, pos[, endpos]]) -> match object or None.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001299 Scan through string looking for a match, and return a corresponding\n\
Andrew Svetlov0b64c142012-12-25 18:48:54 +02001300 match object instance. Return None if no position in the string matches.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001301
1302PyDoc_STRVAR(pattern_split_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001303"split(string[, maxsplit = 0]) -> list.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001304 Split string by the occurrences of pattern.");
1305
1306PyDoc_STRVAR(pattern_findall_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001307"findall(string[, pos[, endpos]]) -> list.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001308 Return a list of all non-overlapping matches of pattern in string.");
1309
1310PyDoc_STRVAR(pattern_finditer_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001311"finditer(string[, pos[, endpos]]) -> iterator.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001312 Return an iterator over all non-overlapping matches for the \n\
1313 RE pattern in string. For each match, the iterator returns a\n\
1314 match object.");
1315
1316PyDoc_STRVAR(pattern_sub_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001317"sub(repl, string[, count = 0]) -> newstring.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001318 Return the string obtained by replacing the leftmost non-overlapping\n\
Tim Peters3d563502006-01-21 02:47:53 +00001319 occurrences of pattern in string by the replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001320
1321PyDoc_STRVAR(pattern_subn_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001322"subn(repl, string[, count = 0]) -> (newstring, number of subs)\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001323 Return the tuple (new_string, number_of_subs_made) found by replacing\n\
1324 the leftmost non-overlapping occurrences of pattern with the\n\
Tim Peters3d563502006-01-21 02:47:53 +00001325 replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001326
1327PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
1328
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001329static PyMethodDef pattern_methods[] = {
Larry Hastings16c51912014-01-07 11:53:01 -08001330 PATTERN_MATCH_METHODDEF
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02001331 {"fullmatch", (PyCFunction) pattern_fullmatch, METH_VARARGS|METH_KEYWORDS,
1332 pattern_fullmatch_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001333 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001334 pattern_search_doc},
Raymond Hettinger94478742004-09-24 04:31:19 +00001335 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001336 pattern_sub_doc},
Raymond Hettinger94478742004-09-24 04:31:19 +00001337 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001338 pattern_subn_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001339 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001340 pattern_split_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001341 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001342 pattern_findall_doc},
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001343 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001344 pattern_finditer_doc},
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001345 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS|METH_KEYWORDS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001346 {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
1347 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001348 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001349};
1350
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00001351#define PAT_OFF(x) offsetof(PatternObject, x)
1352static PyMemberDef pattern_members[] = {
1353 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
1354 {"flags", T_INT, PAT_OFF(flags), READONLY},
1355 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
1356 {"groupindex", T_OBJECT, PAT_OFF(groupindex), READONLY},
1357 {NULL} /* Sentinel */
1358};
Guido van Rossumb700df92000-03-31 14:59:30 +00001359
Neal Norwitz57c179c2006-03-22 07:18:02 +00001360static PyTypeObject Pattern_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001361 PyVarObject_HEAD_INIT(NULL, 0)
1362 "_" SRE_MODULE ".SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001363 sizeof(PatternObject), sizeof(SRE_CODE),
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001364 (destructor)pattern_dealloc, /* tp_dealloc */
1365 0, /* tp_print */
1366 0, /* tp_getattr */
1367 0, /* tp_setattr */
1368 0, /* tp_reserved */
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001369 (reprfunc)pattern_repr, /* tp_repr */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001370 0, /* tp_as_number */
1371 0, /* tp_as_sequence */
1372 0, /* tp_as_mapping */
1373 0, /* tp_hash */
1374 0, /* tp_call */
1375 0, /* tp_str */
1376 0, /* tp_getattro */
1377 0, /* tp_setattro */
1378 0, /* tp_as_buffer */
1379 Py_TPFLAGS_DEFAULT, /* tp_flags */
1380 pattern_doc, /* tp_doc */
1381 0, /* tp_traverse */
1382 0, /* tp_clear */
1383 0, /* tp_richcompare */
1384 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
1385 0, /* tp_iter */
1386 0, /* tp_iternext */
1387 pattern_methods, /* tp_methods */
1388 pattern_members, /* tp_members */
Guido van Rossumb700df92000-03-31 14:59:30 +00001389};
1390
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001391static int _validate(PatternObject *self); /* Forward */
1392
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001393static PyObject *
1394_compile(PyObject* self_, PyObject* args)
1395{
1396 /* "compile" pattern descriptor to pattern object */
1397
1398 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001399 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001400
1401 PyObject* pattern;
1402 int flags = 0;
1403 PyObject* code;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001404 Py_ssize_t groups = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001405 PyObject* groupindex = NULL;
1406 PyObject* indexgroup = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001407
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001408 if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001409 &PyList_Type, &code, &groups,
1410 &groupindex, &indexgroup))
1411 return NULL;
1412
1413 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001414 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001415 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1416 if (!self)
1417 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001418 self->weakreflist = NULL;
1419 self->pattern = NULL;
1420 self->groupindex = NULL;
1421 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001422
1423 self->codesize = n;
1424
1425 for (i = 0; i < n; i++) {
1426 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001427 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001428 self->code[i] = (SRE_CODE) value;
1429 if ((unsigned long) self->code[i] != value) {
1430 PyErr_SetString(PyExc_OverflowError,
1431 "regular expression code size limit exceeded");
1432 break;
1433 }
1434 }
1435
1436 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001437 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001438 return NULL;
1439 }
1440
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001441 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001442 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001443 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001444 else {
1445 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001446 int charsize;
1447 Py_buffer view;
1448 view.buf = NULL;
1449 if (!getstring(pattern, &p_length, &self->isbytes,
1450 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001451 Py_DECREF(self);
1452 return NULL;
1453 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001454 if (view.buf)
1455 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001456 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001457
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001458 Py_INCREF(pattern);
1459 self->pattern = pattern;
1460
1461 self->flags = flags;
1462
1463 self->groups = groups;
1464
1465 Py_XINCREF(groupindex);
1466 self->groupindex = groupindex;
1467
1468 Py_XINCREF(indexgroup);
1469 self->indexgroup = indexgroup;
1470
1471 self->weakreflist = NULL;
1472
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001473 if (!_validate(self)) {
1474 Py_DECREF(self);
1475 return NULL;
1476 }
1477
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001478 return (PyObject*) self;
1479}
1480
Guido van Rossumb700df92000-03-31 14:59:30 +00001481/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001482/* Code validation */
1483
1484/* To learn more about this code, have a look at the _compile() function in
1485 Lib/sre_compile.py. The validation functions below checks the code array
1486 for conformance with the code patterns generated there.
1487
1488 The nice thing about the generated code is that it is position-independent:
1489 all jumps are relative jumps forward. Also, jumps don't cross each other:
1490 the target of a later jump is always earlier than the target of an earlier
1491 jump. IOW, this is okay:
1492
1493 J---------J-------T--------T
1494 \ \_____/ /
1495 \______________________/
1496
1497 but this is not:
1498
1499 J---------J-------T--------T
1500 \_________\_____/ /
1501 \____________/
1502
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001503 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001504*/
1505
1506/* Defining this one enables tracing of the validator */
1507#undef VVERBOSE
1508
1509/* Trace macro for the validator */
1510#if defined(VVERBOSE)
1511#define VTRACE(v) printf v
1512#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001513#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001514#endif
1515
1516/* Report failure */
1517#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1518
1519/* Extract opcode, argument, or skip count from code array */
1520#define GET_OP \
1521 do { \
1522 VTRACE(("%p: ", code)); \
1523 if (code >= end) FAIL; \
1524 op = *code++; \
1525 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1526 } while (0)
1527#define GET_ARG \
1528 do { \
1529 VTRACE(("%p= ", code)); \
1530 if (code >= end) FAIL; \
1531 arg = *code++; \
1532 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1533 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001534#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001535 do { \
1536 VTRACE(("%p= ", code)); \
1537 if (code >= end) FAIL; \
1538 skip = *code; \
1539 VTRACE(("%lu (skip to %p)\n", \
1540 (unsigned long)skip, code+skip)); \
Victor Stinner1fa174a2013-08-28 02:06:21 +02001541 if (skip-adj > (Py_uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001542 FAIL; \
1543 code++; \
1544 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001545#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001546
1547static int
1548_validate_charset(SRE_CODE *code, SRE_CODE *end)
1549{
1550 /* Some variables are manipulated by the macros above */
1551 SRE_CODE op;
1552 SRE_CODE arg;
1553 SRE_CODE offset;
1554 int i;
1555
1556 while (code < end) {
1557 GET_OP;
1558 switch (op) {
1559
1560 case SRE_OP_NEGATE:
1561 break;
1562
1563 case SRE_OP_LITERAL:
1564 GET_ARG;
1565 break;
1566
1567 case SRE_OP_RANGE:
1568 GET_ARG;
1569 GET_ARG;
1570 break;
1571
1572 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001573 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001574 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001575 FAIL;
1576 code += offset;
1577 break;
1578
1579 case SRE_OP_BIGCHARSET:
1580 GET_ARG; /* Number of blocks */
1581 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001582 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001583 FAIL;
1584 /* Make sure that each byte points to a valid block */
1585 for (i = 0; i < 256; i++) {
1586 if (((unsigned char *)code)[i] >= arg)
1587 FAIL;
1588 }
1589 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001590 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001591 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001592 FAIL;
1593 code += offset;
1594 break;
1595
1596 case SRE_OP_CATEGORY:
1597 GET_ARG;
1598 switch (arg) {
1599 case SRE_CATEGORY_DIGIT:
1600 case SRE_CATEGORY_NOT_DIGIT:
1601 case SRE_CATEGORY_SPACE:
1602 case SRE_CATEGORY_NOT_SPACE:
1603 case SRE_CATEGORY_WORD:
1604 case SRE_CATEGORY_NOT_WORD:
1605 case SRE_CATEGORY_LINEBREAK:
1606 case SRE_CATEGORY_NOT_LINEBREAK:
1607 case SRE_CATEGORY_LOC_WORD:
1608 case SRE_CATEGORY_LOC_NOT_WORD:
1609 case SRE_CATEGORY_UNI_DIGIT:
1610 case SRE_CATEGORY_UNI_NOT_DIGIT:
1611 case SRE_CATEGORY_UNI_SPACE:
1612 case SRE_CATEGORY_UNI_NOT_SPACE:
1613 case SRE_CATEGORY_UNI_WORD:
1614 case SRE_CATEGORY_UNI_NOT_WORD:
1615 case SRE_CATEGORY_UNI_LINEBREAK:
1616 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1617 break;
1618 default:
1619 FAIL;
1620 }
1621 break;
1622
1623 default:
1624 FAIL;
1625
1626 }
1627 }
1628
1629 return 1;
1630}
1631
1632static int
1633_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1634{
1635 /* Some variables are manipulated by the macros above */
1636 SRE_CODE op;
1637 SRE_CODE arg;
1638 SRE_CODE skip;
1639
1640 VTRACE(("code=%p, end=%p\n", code, end));
1641
1642 if (code > end)
1643 FAIL;
1644
1645 while (code < end) {
1646 GET_OP;
1647 switch (op) {
1648
1649 case SRE_OP_MARK:
1650 /* We don't check whether marks are properly nested; the
1651 sre_match() code is robust even if they don't, and the worst
1652 you can get is nonsensical match results. */
1653 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001654 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001655 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1656 FAIL;
1657 }
1658 break;
1659
1660 case SRE_OP_LITERAL:
1661 case SRE_OP_NOT_LITERAL:
1662 case SRE_OP_LITERAL_IGNORE:
1663 case SRE_OP_NOT_LITERAL_IGNORE:
1664 GET_ARG;
1665 /* The arg is just a character, nothing to check */
1666 break;
1667
1668 case SRE_OP_SUCCESS:
1669 case SRE_OP_FAILURE:
1670 /* Nothing to check; these normally end the matching process */
1671 break;
1672
1673 case SRE_OP_AT:
1674 GET_ARG;
1675 switch (arg) {
1676 case SRE_AT_BEGINNING:
1677 case SRE_AT_BEGINNING_STRING:
1678 case SRE_AT_BEGINNING_LINE:
1679 case SRE_AT_END:
1680 case SRE_AT_END_LINE:
1681 case SRE_AT_END_STRING:
1682 case SRE_AT_BOUNDARY:
1683 case SRE_AT_NON_BOUNDARY:
1684 case SRE_AT_LOC_BOUNDARY:
1685 case SRE_AT_LOC_NON_BOUNDARY:
1686 case SRE_AT_UNI_BOUNDARY:
1687 case SRE_AT_UNI_NON_BOUNDARY:
1688 break;
1689 default:
1690 FAIL;
1691 }
1692 break;
1693
1694 case SRE_OP_ANY:
1695 case SRE_OP_ANY_ALL:
1696 /* These have no operands */
1697 break;
1698
1699 case SRE_OP_IN:
1700 case SRE_OP_IN_IGNORE:
1701 GET_SKIP;
1702 /* Stop 1 before the end; we check the FAILURE below */
1703 if (!_validate_charset(code, code+skip-2))
1704 FAIL;
1705 if (code[skip-2] != SRE_OP_FAILURE)
1706 FAIL;
1707 code += skip-1;
1708 break;
1709
1710 case SRE_OP_INFO:
1711 {
1712 /* A minimal info field is
1713 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1714 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1715 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001716 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001717 SRE_CODE *newcode;
1718 GET_SKIP;
1719 newcode = code+skip-1;
1720 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001721 GET_ARG;
1722 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001723 /* Check that only valid flags are present */
1724 if ((flags & ~(SRE_INFO_PREFIX |
1725 SRE_INFO_LITERAL |
1726 SRE_INFO_CHARSET)) != 0)
1727 FAIL;
1728 /* PREFIX and CHARSET are mutually exclusive */
1729 if ((flags & SRE_INFO_PREFIX) &&
1730 (flags & SRE_INFO_CHARSET))
1731 FAIL;
1732 /* LITERAL implies PREFIX */
1733 if ((flags & SRE_INFO_LITERAL) &&
1734 !(flags & SRE_INFO_PREFIX))
1735 FAIL;
1736 /* Validate the prefix */
1737 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001738 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001739 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001740 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001741 /* Here comes the prefix string */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001742 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001743 FAIL;
1744 code += prefix_len;
1745 /* And here comes the overlap table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001746 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001747 FAIL;
1748 /* Each overlap value should be < prefix_len */
1749 for (i = 0; i < prefix_len; i++) {
1750 if (code[i] >= prefix_len)
1751 FAIL;
1752 }
1753 code += prefix_len;
1754 }
1755 /* Validate the charset */
1756 if (flags & SRE_INFO_CHARSET) {
1757 if (!_validate_charset(code, newcode-1))
1758 FAIL;
1759 if (newcode[-1] != SRE_OP_FAILURE)
1760 FAIL;
1761 code = newcode;
1762 }
1763 else if (code != newcode) {
1764 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1765 FAIL;
1766 }
1767 }
1768 break;
1769
1770 case SRE_OP_BRANCH:
1771 {
1772 SRE_CODE *target = NULL;
1773 for (;;) {
1774 GET_SKIP;
1775 if (skip == 0)
1776 break;
1777 /* Stop 2 before the end; we check the JUMP below */
1778 if (!_validate_inner(code, code+skip-3, groups))
1779 FAIL;
1780 code += skip-3;
1781 /* Check that it ends with a JUMP, and that each JUMP
1782 has the same target */
1783 GET_OP;
1784 if (op != SRE_OP_JUMP)
1785 FAIL;
1786 GET_SKIP;
1787 if (target == NULL)
1788 target = code+skip-1;
1789 else if (code+skip-1 != target)
1790 FAIL;
1791 }
1792 }
1793 break;
1794
1795 case SRE_OP_REPEAT_ONE:
1796 case SRE_OP_MIN_REPEAT_ONE:
1797 {
1798 SRE_CODE min, max;
1799 GET_SKIP;
1800 GET_ARG; min = arg;
1801 GET_ARG; max = arg;
1802 if (min > max)
1803 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001804 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001805 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001806 if (!_validate_inner(code, code+skip-4, groups))
1807 FAIL;
1808 code += skip-4;
1809 GET_OP;
1810 if (op != SRE_OP_SUCCESS)
1811 FAIL;
1812 }
1813 break;
1814
1815 case SRE_OP_REPEAT:
1816 {
1817 SRE_CODE min, max;
1818 GET_SKIP;
1819 GET_ARG; min = arg;
1820 GET_ARG; max = arg;
1821 if (min > max)
1822 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001823 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001824 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001825 if (!_validate_inner(code, code+skip-3, groups))
1826 FAIL;
1827 code += skip-3;
1828 GET_OP;
1829 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1830 FAIL;
1831 }
1832 break;
1833
1834 case SRE_OP_GROUPREF:
1835 case SRE_OP_GROUPREF_IGNORE:
1836 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001837 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001838 FAIL;
1839 break;
1840
1841 case SRE_OP_GROUPREF_EXISTS:
1842 /* The regex syntax for this is: '(?(group)then|else)', where
1843 'group' is either an integer group number or a group name,
1844 'then' and 'else' are sub-regexes, and 'else' is optional. */
1845 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001846 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001847 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001848 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001849 code--; /* The skip is relative to the first arg! */
1850 /* There are two possibilities here: if there is both a 'then'
1851 part and an 'else' part, the generated code looks like:
1852
1853 GROUPREF_EXISTS
1854 <group>
1855 <skipyes>
1856 ...then part...
1857 JUMP
1858 <skipno>
1859 (<skipyes> jumps here)
1860 ...else part...
1861 (<skipno> jumps here)
1862
1863 If there is only a 'then' part, it looks like:
1864
1865 GROUPREF_EXISTS
1866 <group>
1867 <skip>
1868 ...then part...
1869 (<skip> jumps here)
1870
1871 There is no direct way to decide which it is, and we don't want
1872 to allow arbitrary jumps anywhere in the code; so we just look
1873 for a JUMP opcode preceding our skip target.
1874 */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001875 if (skip >= 3 && skip-3 < (Py_uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001876 code[skip-3] == SRE_OP_JUMP)
1877 {
1878 VTRACE(("both then and else parts present\n"));
1879 if (!_validate_inner(code+1, code+skip-3, groups))
1880 FAIL;
1881 code += skip-2; /* Position after JUMP, at <skipno> */
1882 GET_SKIP;
1883 if (!_validate_inner(code, code+skip-1, groups))
1884 FAIL;
1885 code += skip-1;
1886 }
1887 else {
1888 VTRACE(("only a then part present\n"));
1889 if (!_validate_inner(code+1, code+skip-1, groups))
1890 FAIL;
1891 code += skip-1;
1892 }
1893 break;
1894
1895 case SRE_OP_ASSERT:
1896 case SRE_OP_ASSERT_NOT:
1897 GET_SKIP;
1898 GET_ARG; /* 0 for lookahead, width for lookbehind */
1899 code--; /* Back up over arg to simplify math below */
1900 if (arg & 0x80000000)
1901 FAIL; /* Width too large */
1902 /* Stop 1 before the end; we check the SUCCESS below */
1903 if (!_validate_inner(code+1, code+skip-2, groups))
1904 FAIL;
1905 code += skip-2;
1906 GET_OP;
1907 if (op != SRE_OP_SUCCESS)
1908 FAIL;
1909 break;
1910
1911 default:
1912 FAIL;
1913
1914 }
1915 }
1916
1917 VTRACE(("okay\n"));
1918 return 1;
1919}
1920
1921static int
1922_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1923{
1924 if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
1925 FAIL;
1926 if (groups == 0) /* fix for simplejson */
1927 groups = 100; /* 100 groups should always be safe */
1928 return _validate_inner(code, end-1, groups);
1929}
1930
1931static int
1932_validate(PatternObject *self)
1933{
1934 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1935 {
1936 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1937 return 0;
1938 }
1939 else
1940 VTRACE(("Success!\n"));
1941 return 1;
1942}
1943
1944/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001945/* match methods */
1946
1947static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001948match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001949{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001950 Py_XDECREF(self->regs);
1951 Py_XDECREF(self->string);
1952 Py_DECREF(self->pattern);
1953 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001954}
1955
1956static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001957match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001958{
Serhiy Storchaka25324972013-10-16 12:46:28 +03001959 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001960 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001961 Py_buffer view;
1962 PyObject *result;
1963 void* ptr;
1964
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001965 if (index < 0 || index >= self->groups) {
1966 /* raise IndexError if we were given a bad group number */
1967 PyErr_SetString(
1968 PyExc_IndexError,
1969 "no such group"
1970 );
1971 return NULL;
1972 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001973
Fredrik Lundh6f013982000-07-03 18:44:21 +00001974 index *= 2;
1975
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001976 if (self->string == Py_None || self->mark[index] < 0) {
1977 /* return default value if the string or group is undefined */
1978 Py_INCREF(def);
1979 return def;
1980 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001981
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001982 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001983 if (ptr == NULL)
1984 return NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001985 result = getslice(isbytes, ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001986 self->string, self->mark[index], self->mark[index+1]);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001987 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001988 PyBuffer_Release(&view);
1989 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001990}
1991
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001992static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001993match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001994{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001995 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001996
Guido van Rossumddefaf32007-01-14 03:31:43 +00001997 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001998 /* Default value */
1999 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00002000
Christian Heimes217cfd12007-12-02 14:31:20 +00002001 if (PyLong_Check(index))
2002 return PyLong_AsSsize_t(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00002003
Fredrik Lundh6f013982000-07-03 18:44:21 +00002004 i = -1;
2005
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002006 if (self->pattern->groupindex) {
2007 index = PyObject_GetItem(self->pattern->groupindex, index);
2008 if (index) {
Neal Norwitz1fe5f382007-08-31 04:32:55 +00002009 if (PyLong_Check(index))
Christian Heimes217cfd12007-12-02 14:31:20 +00002010 i = PyLong_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00002011 Py_DECREF(index);
2012 } else
2013 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002014 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002015
2016 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002017}
2018
2019static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002020match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002021{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002022 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002023}
2024
2025static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002026match_expand(MatchObject* self, PyObject* ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002027{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002028 /* delegate to Python code */
2029 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00002030 SRE_PY_MODULE, "_expand",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002031 PyTuple_Pack(3, self->pattern, self, ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002032 );
2033}
2034
2035static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002036match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002037{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002038 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002039 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002040
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002041 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002042
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002043 switch (size) {
2044 case 0:
2045 result = match_getslice(self, Py_False, Py_None);
2046 break;
2047 case 1:
2048 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2049 break;
2050 default:
2051 /* fetch multiple items */
2052 result = PyTuple_New(size);
2053 if (!result)
2054 return NULL;
2055 for (i = 0; i < size; i++) {
2056 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002057 self, PyTuple_GET_ITEM(args, i), Py_None
2058 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002059 if (!item) {
2060 Py_DECREF(result);
2061 return NULL;
2062 }
2063 PyTuple_SET_ITEM(result, i, item);
2064 }
2065 break;
2066 }
2067 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002068}
2069
2070static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002071match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002072{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002073 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002074 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002075
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002076 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002077 static char* kwlist[] = { "default", NULL };
Fredrik Lundh562586e2000-10-03 20:43:34 +00002078 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002079 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002080
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002081 result = PyTuple_New(self->groups-1);
2082 if (!result)
2083 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002084
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002085 for (index = 1; index < self->groups; index++) {
2086 PyObject* item;
2087 item = match_getslice_by_index(self, index, def);
2088 if (!item) {
2089 Py_DECREF(result);
2090 return NULL;
2091 }
2092 PyTuple_SET_ITEM(result, index-1, item);
2093 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002094
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002095 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002096}
2097
2098static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002099match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002100{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002101 PyObject* result;
2102 PyObject* keys;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002103 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002104
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002105 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002106 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00002107 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002108 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002109
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002110 result = PyDict_New();
2111 if (!result || !self->pattern->groupindex)
2112 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002113
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002114 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002115 if (!keys)
2116 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002117
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002118 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002119 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002120 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002121 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002122 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002123 if (!key)
2124 goto failed;
2125 value = match_getslice(self, key, def);
2126 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002127 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002128 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002129 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002130 status = PyDict_SetItem(result, key, value);
2131 Py_DECREF(value);
2132 if (status < 0)
2133 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002134 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002135
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002136 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002137
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002138 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002139
2140failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00002141 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002142 Py_DECREF(result);
2143 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002144}
2145
2146static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002147match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002148{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002149 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002150
Guido van Rossumddefaf32007-01-14 03:31:43 +00002151 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002152 if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002153 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002154
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002155 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002156
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002157 if (index < 0 || index >= self->groups) {
2158 PyErr_SetString(
2159 PyExc_IndexError,
2160 "no such group"
2161 );
2162 return NULL;
2163 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002164
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002165 /* mark is -1 if group is undefined */
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002166 return PyLong_FromSsize_t(self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002167}
2168
2169static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002170match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002171{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002172 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002173
Guido van Rossumddefaf32007-01-14 03:31:43 +00002174 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002175 if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002176 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002177
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002178 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002179
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002180 if (index < 0 || index >= self->groups) {
2181 PyErr_SetString(
2182 PyExc_IndexError,
2183 "no such group"
2184 );
2185 return NULL;
2186 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002187
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002188 /* mark is -1 if group is undefined */
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002189 return PyLong_FromSsize_t(self->mark[index*2+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002190}
2191
2192LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002193_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002194{
2195 PyObject* pair;
2196 PyObject* item;
2197
2198 pair = PyTuple_New(2);
2199 if (!pair)
2200 return NULL;
2201
Christian Heimes217cfd12007-12-02 14:31:20 +00002202 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002203 if (!item)
2204 goto error;
2205 PyTuple_SET_ITEM(pair, 0, item);
2206
Christian Heimes217cfd12007-12-02 14:31:20 +00002207 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002208 if (!item)
2209 goto error;
2210 PyTuple_SET_ITEM(pair, 1, item);
2211
2212 return pair;
2213
2214 error:
2215 Py_DECREF(pair);
2216 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002217}
2218
2219static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002220match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002221{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002222 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002223
Guido van Rossumddefaf32007-01-14 03:31:43 +00002224 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002225 if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002226 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002227
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002228 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002229
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002230 if (index < 0 || index >= self->groups) {
2231 PyErr_SetString(
2232 PyExc_IndexError,
2233 "no such group"
2234 );
2235 return NULL;
2236 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002237
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002238 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002239 return _pair(self->mark[index*2], self->mark[index*2+1]);
2240}
2241
2242static PyObject*
2243match_regs(MatchObject* self)
2244{
2245 PyObject* regs;
2246 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002247 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002248
2249 regs = PyTuple_New(self->groups);
2250 if (!regs)
2251 return NULL;
2252
2253 for (index = 0; index < self->groups; index++) {
2254 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2255 if (!item) {
2256 Py_DECREF(regs);
2257 return NULL;
2258 }
2259 PyTuple_SET_ITEM(regs, index, item);
2260 }
2261
2262 Py_INCREF(regs);
2263 self->regs = regs;
2264
2265 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002266}
2267
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002268static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002269match_copy(MatchObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002270{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002271#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002272 MatchObject* copy;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002273 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00002274
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002275 slots = 2 * (self->pattern->groups+1);
2276
2277 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2278 if (!copy)
2279 return NULL;
2280
2281 /* this value a constant, but any compiler should be able to
2282 figure that out all by itself */
2283 offset = offsetof(MatchObject, string);
2284
2285 Py_XINCREF(self->pattern);
2286 Py_XINCREF(self->string);
2287 Py_XINCREF(self->regs);
2288
2289 memcpy((char*) copy + offset, (char*) self + offset,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002290 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002291
2292 return (PyObject*) copy;
2293#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002294 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002295 return NULL;
2296#endif
2297}
2298
2299static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002300match_deepcopy(MatchObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002301{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002302#ifdef USE_BUILTIN_COPY
2303 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002304
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002305 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002306 if (!copy)
2307 return NULL;
2308
2309 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2310 !deepcopy(&copy->string, memo) ||
2311 !deepcopy(&copy->regs, memo)) {
2312 Py_DECREF(copy);
2313 return NULL;
2314 }
2315
2316#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002317 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2318 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002319#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002320}
2321
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002322PyDoc_STRVAR(match_doc,
2323"The result of re.match() and re.search().\n\
2324Match objects always have a boolean value of True.");
2325
2326PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002327"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002328 Return subgroup(s) of the match by indices or names.\n\
2329 For 0 returns the entire match.");
2330
2331PyDoc_STRVAR(match_start_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002332"start([group=0]) -> int.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002333 Return index of the start of the substring matched by group.");
2334
2335PyDoc_STRVAR(match_end_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002336"end([group=0]) -> int.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002337 Return index of the end of the substring matched by group.");
2338
2339PyDoc_STRVAR(match_span_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002340"span([group]) -> tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002341 For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).");
2342
2343PyDoc_STRVAR(match_groups_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002344"groups([default=None]) -> tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002345 Return a tuple containing all the subgroups of the match, from 1.\n\
2346 The default argument is used for groups\n\
2347 that did not participate in the match");
2348
2349PyDoc_STRVAR(match_groupdict_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002350"groupdict([default=None]) -> dict.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002351 Return a dictionary containing all the named subgroups of the match,\n\
2352 keyed by the subgroup name. The default argument is used for groups\n\
2353 that did not participate in the match");
2354
2355PyDoc_STRVAR(match_expand_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002356"expand(template) -> str.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002357 Return the string obtained by doing backslash substitution\n\
2358 on the string template, as done by the sub() method.");
2359
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002360static PyMethodDef match_methods[] = {
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002361 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2362 {"start", (PyCFunction) match_start, METH_VARARGS, match_start_doc},
2363 {"end", (PyCFunction) match_end, METH_VARARGS, match_end_doc},
2364 {"span", (PyCFunction) match_span, METH_VARARGS, match_span_doc},
2365 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS,
2366 match_groups_doc},
2367 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS,
2368 match_groupdict_doc},
2369 {"expand", (PyCFunction) match_expand, METH_O, match_expand_doc},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002370 {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
2371 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002372 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002373};
2374
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002375static PyObject *
2376match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002377{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002378 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002379 return PyLong_FromSsize_t(self->lastindex);
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002380 Py_INCREF(Py_None);
2381 return Py_None;
Guido van Rossumb700df92000-03-31 14:59:30 +00002382}
2383
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002384static PyObject *
2385match_lastgroup_get(MatchObject *self)
2386{
2387 if (self->pattern->indexgroup && self->lastindex >= 0) {
2388 PyObject* result = PySequence_GetItem(
2389 self->pattern->indexgroup, self->lastindex
2390 );
2391 if (result)
2392 return result;
2393 PyErr_Clear();
2394 }
2395 Py_INCREF(Py_None);
2396 return Py_None;
2397}
2398
2399static PyObject *
2400match_regs_get(MatchObject *self)
2401{
2402 if (self->regs) {
2403 Py_INCREF(self->regs);
2404 return self->regs;
2405 } else
2406 return match_regs(self);
2407}
2408
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002409static PyObject *
2410match_repr(MatchObject *self)
2411{
2412 PyObject *result;
2413 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2414 if (group0 == NULL)
2415 return NULL;
2416 result = PyUnicode_FromFormat(
2417 "<%s object; span=(%d, %d), match=%.50R>",
2418 Py_TYPE(self)->tp_name,
2419 self->mark[0], self->mark[1], group0);
2420 Py_DECREF(group0);
2421 return result;
2422}
2423
2424
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002425static PyGetSetDef match_getset[] = {
2426 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
2427 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
2428 {"regs", (getter)match_regs_get, (setter)NULL},
2429 {NULL}
2430};
2431
2432#define MATCH_OFF(x) offsetof(MatchObject, x)
2433static PyMemberDef match_members[] = {
2434 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
2435 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
2436 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
2437 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
2438 {NULL}
2439};
2440
Guido van Rossumb700df92000-03-31 14:59:30 +00002441/* FIXME: implement setattr("string", None) as a special case (to
2442 detach the associated string, if any */
2443
Neal Norwitz57c179c2006-03-22 07:18:02 +00002444static PyTypeObject Match_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002445 PyVarObject_HEAD_INIT(NULL,0)
2446 "_" SRE_MODULE ".SRE_Match",
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002447 sizeof(MatchObject), sizeof(Py_ssize_t),
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002448 (destructor)match_dealloc, /* tp_dealloc */
2449 0, /* tp_print */
2450 0, /* tp_getattr */
2451 0, /* tp_setattr */
2452 0, /* tp_reserved */
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002453 (reprfunc)match_repr, /* tp_repr */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002454 0, /* tp_as_number */
2455 0, /* tp_as_sequence */
2456 0, /* tp_as_mapping */
2457 0, /* tp_hash */
2458 0, /* tp_call */
2459 0, /* tp_str */
2460 0, /* tp_getattro */
2461 0, /* tp_setattro */
2462 0, /* tp_as_buffer */
2463 Py_TPFLAGS_DEFAULT, /* tp_flags */
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002464 match_doc, /* tp_doc */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002465 0, /* tp_traverse */
2466 0, /* tp_clear */
2467 0, /* tp_richcompare */
2468 0, /* tp_weaklistoffset */
2469 0, /* tp_iter */
2470 0, /* tp_iternext */
2471 match_methods, /* tp_methods */
2472 match_members, /* tp_members */
2473 match_getset, /* tp_getset */
Guido van Rossumb700df92000-03-31 14:59:30 +00002474};
2475
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002476static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002477pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002478{
2479 /* create match object (from state object) */
2480
2481 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002482 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002483 char* base;
2484 int n;
2485
2486 if (status > 0) {
2487
2488 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002489 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002490 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2491 2*(pattern->groups+1));
2492 if (!match)
2493 return NULL;
2494
2495 Py_INCREF(pattern);
2496 match->pattern = pattern;
2497
2498 Py_INCREF(state->string);
2499 match->string = state->string;
2500
2501 match->regs = NULL;
2502 match->groups = pattern->groups+1;
2503
2504 /* fill in group slices */
2505
2506 base = (char*) state->beginning;
2507 n = state->charsize;
2508
2509 match->mark[0] = ((char*) state->start - base) / n;
2510 match->mark[1] = ((char*) state->ptr - base) / n;
2511
2512 for (i = j = 0; i < pattern->groups; i++, j+=2)
2513 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2514 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2515 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2516 } else
2517 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2518
2519 match->pos = state->pos;
2520 match->endpos = state->endpos;
2521
2522 match->lastindex = state->lastindex;
2523
2524 return (PyObject*) match;
2525
2526 } else if (status == 0) {
2527
2528 /* no match */
2529 Py_INCREF(Py_None);
2530 return Py_None;
2531
2532 }
2533
2534 /* internal error */
2535 pattern_error(status);
2536 return NULL;
2537}
2538
2539
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002540/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002541/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002542
2543static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002544scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002545{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002546 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002547 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002548 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002549}
2550
2551static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002552scanner_match(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002553{
2554 SRE_STATE* state = &self->state;
2555 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002556 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002557
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002558 state_reset(state);
2559
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002560 state->ptr = state->start;
2561
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002562 status = sre_match(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002563 if (PyErr_Occurred())
2564 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002565
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002566 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002567 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002568
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00002569 if (status == 0 || state->ptr == state->start)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002570 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002571 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002572 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002573
2574 return match;
2575}
2576
2577
2578static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002579scanner_search(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002580{
2581 SRE_STATE* state = &self->state;
2582 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002583 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002584
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002585 state_reset(state);
2586
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002587 state->ptr = state->start;
2588
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002589 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002590 if (PyErr_Occurred())
2591 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002592
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002593 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002594 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002595
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00002596 if (status == 0 || state->ptr == state->start)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002597 state->start = (void*) ((char*) state->ptr + state->charsize);
2598 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002599 state->start = state->ptr;
2600
2601 return match;
2602}
2603
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002604static PyMethodDef scanner_methods[] = {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002605 {"match", (PyCFunction) scanner_match, METH_NOARGS},
2606 {"search", (PyCFunction) scanner_search, METH_NOARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002607 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002608};
2609
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002610#define SCAN_OFF(x) offsetof(ScannerObject, x)
2611static PyMemberDef scanner_members[] = {
Ezio Melotti7c8c1ea2011-09-29 01:00:19 +03002612 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002613 {NULL} /* Sentinel */
2614};
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002615
Neal Norwitz57c179c2006-03-22 07:18:02 +00002616static PyTypeObject Scanner_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002617 PyVarObject_HEAD_INIT(NULL, 0)
2618 "_" SRE_MODULE ".SRE_Scanner",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002619 sizeof(ScannerObject), 0,
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002620 (destructor)scanner_dealloc,/* tp_dealloc */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002621 0, /* tp_print */
2622 0, /* tp_getattr */
2623 0, /* tp_setattr */
2624 0, /* tp_reserved */
2625 0, /* tp_repr */
2626 0, /* tp_as_number */
2627 0, /* tp_as_sequence */
2628 0, /* tp_as_mapping */
2629 0, /* tp_hash */
2630 0, /* tp_call */
2631 0, /* tp_str */
2632 0, /* tp_getattro */
2633 0, /* tp_setattro */
2634 0, /* tp_as_buffer */
2635 Py_TPFLAGS_DEFAULT, /* tp_flags */
2636 0, /* tp_doc */
2637 0, /* tp_traverse */
2638 0, /* tp_clear */
2639 0, /* tp_richcompare */
2640 0, /* tp_weaklistoffset */
2641 0, /* tp_iter */
2642 0, /* tp_iternext */
2643 scanner_methods, /* tp_methods */
2644 scanner_members, /* tp_members */
2645 0, /* tp_getset */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002646};
2647
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002648static PyObject*
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06002649pattern_scanner(PatternObject* pattern, PyObject* args, PyObject* kw)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002650{
2651 /* create search state object */
2652
2653 ScannerObject* self;
2654
2655 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002656 Py_ssize_t start = 0;
2657 Py_ssize_t end = PY_SSIZE_T_MAX;
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06002658 static char* kwlist[] = { "source", "pos", "endpos", NULL };
2659 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:scanner", kwlist,
2660 &string, &start, &end))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002661 return NULL;
2662
2663 /* create scanner object */
2664 self = PyObject_NEW(ScannerObject, &Scanner_Type);
2665 if (!self)
2666 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002667 self->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002668
2669 string = state_init(&self->state, pattern, string, start, end);
2670 if (!string) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002671 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002672 return NULL;
2673 }
2674
2675 Py_INCREF(pattern);
2676 self->pattern = (PyObject*) pattern;
2677
2678 return (PyObject*) self;
2679}
2680
Guido van Rossumb700df92000-03-31 14:59:30 +00002681static PyMethodDef _functions[] = {
Neal Norwitzb0493252002-03-31 14:44:22 +00002682 {"compile", _compile, METH_VARARGS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002683 {"getcodesize", sre_codesize, METH_NOARGS},
Neal Norwitzb0493252002-03-31 14:44:22 +00002684 {"getlower", sre_getlower, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002685 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002686};
2687
Martin v. Löwis1a214512008-06-11 05:26:20 +00002688static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002689 PyModuleDef_HEAD_INIT,
2690 "_" SRE_MODULE,
2691 NULL,
2692 -1,
2693 _functions,
2694 NULL,
2695 NULL,
2696 NULL,
2697 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002698};
2699
2700PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002701{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002702 PyObject* m;
2703 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002704 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002705
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002706 /* Patch object types */
2707 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2708 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002709 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002710
Martin v. Löwis1a214512008-06-11 05:26:20 +00002711 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002712 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002713 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002714 d = PyModule_GetDict(m);
2715
Christian Heimes217cfd12007-12-02 14:31:20 +00002716 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002717 if (x) {
2718 PyDict_SetItemString(d, "MAGIC", x);
2719 Py_DECREF(x);
2720 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002721
Christian Heimes217cfd12007-12-02 14:31:20 +00002722 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002723 if (x) {
2724 PyDict_SetItemString(d, "CODESIZE", x);
2725 Py_DECREF(x);
2726 }
2727
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002728 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2729 if (x) {
2730 PyDict_SetItemString(d, "MAXREPEAT", x);
2731 Py_DECREF(x);
2732 }
2733
Neal Norwitzfe537132007-08-26 03:55:15 +00002734 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002735 if (x) {
2736 PyDict_SetItemString(d, "copyright", x);
2737 Py_DECREF(x);
2738 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002739 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002740}
2741
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002742/* vim:ts=4:sw=4:et
2743*/