blob: d4d1d9d0eb3c597c7244066e01a744fe017ba2ce [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000038static char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000063/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064
65/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000066#define USE_FAST_SEARCH
67
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000068/* enables copy/deepcopy handling (work in progress) */
69#undef USE_BUILTIN_COPY
70
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000071/* -------------------------------------------------------------------- */
72
Fredrik Lundh80946112000-06-29 18:03:25 +000073#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000074#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000075#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000076/* fastest possible local call under MSVC */
77#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000078#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000079#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080#else
81#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000082#endif
83
84/* error codes */
85#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000086#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000087#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000088#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000089#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000090
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000091#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000092#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000093#else
94#define TRACE(v)
95#endif
96
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000097/* -------------------------------------------------------------------- */
98/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000099
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000100/* default character predicates (run sre_chars.py to regenerate tables) */
101
102#define SRE_DIGIT_MASK 1
103#define SRE_SPACE_MASK 2
104#define SRE_LINEBREAK_MASK 4
105#define SRE_ALNUM_MASK 8
106#define SRE_WORD_MASK 16
107
Fredrik Lundh21009b92001-09-18 18:47:09 +0000108/* FIXME: this assumes ASCII. create tables in init_sre() instead */
109
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000110static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1112, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1120, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
11325, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
11424, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1150, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
11624, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
117
Fredrik Lundhb389df32000-06-29 12:48:37 +0000118static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000011910, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
12027, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
12144, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
12261, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
123108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
124122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
125106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
126120, 121, 122, 123, 124, 125, 126, 127 };
127
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000128#define SRE_IS_DIGIT(ch)\
129 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
130#define SRE_IS_SPACE(ch)\
131 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
132#define SRE_IS_LINEBREAK(ch)\
133 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
134#define SRE_IS_ALNUM(ch)\
135 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
136#define SRE_IS_WORD(ch)\
137 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000138
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000139static unsigned int sre_lower(unsigned int ch)
140{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000141 return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000142}
143
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000144/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000145/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
146 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000147#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000148#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
149
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000150static unsigned int sre_lower_locale(unsigned int ch)
151{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000152 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000153}
154
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000155/* unicode-specific character predicates */
156
Victor Stinner0058b862011-09-29 03:27:47 +0200157#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
158#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
159#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
160#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
161#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000162
163static unsigned int sre_lower_unicode(unsigned int ch)
164{
Victor Stinner0058b862011-09-29 03:27:47 +0200165 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000166}
167
Guido van Rossumb700df92000-03-31 14:59:30 +0000168LOCAL(int)
169sre_category(SRE_CODE category, unsigned int ch)
170{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000171 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000172
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000173 case SRE_CATEGORY_DIGIT:
174 return SRE_IS_DIGIT(ch);
175 case SRE_CATEGORY_NOT_DIGIT:
176 return !SRE_IS_DIGIT(ch);
177 case SRE_CATEGORY_SPACE:
178 return SRE_IS_SPACE(ch);
179 case SRE_CATEGORY_NOT_SPACE:
180 return !SRE_IS_SPACE(ch);
181 case SRE_CATEGORY_WORD:
182 return SRE_IS_WORD(ch);
183 case SRE_CATEGORY_NOT_WORD:
184 return !SRE_IS_WORD(ch);
185 case SRE_CATEGORY_LINEBREAK:
186 return SRE_IS_LINEBREAK(ch);
187 case SRE_CATEGORY_NOT_LINEBREAK:
188 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000189
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000190 case SRE_CATEGORY_LOC_WORD:
191 return SRE_LOC_IS_WORD(ch);
192 case SRE_CATEGORY_LOC_NOT_WORD:
193 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000194
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000195 case SRE_CATEGORY_UNI_DIGIT:
196 return SRE_UNI_IS_DIGIT(ch);
197 case SRE_CATEGORY_UNI_NOT_DIGIT:
198 return !SRE_UNI_IS_DIGIT(ch);
199 case SRE_CATEGORY_UNI_SPACE:
200 return SRE_UNI_IS_SPACE(ch);
201 case SRE_CATEGORY_UNI_NOT_SPACE:
202 return !SRE_UNI_IS_SPACE(ch);
203 case SRE_CATEGORY_UNI_WORD:
204 return SRE_UNI_IS_WORD(ch);
205 case SRE_CATEGORY_UNI_NOT_WORD:
206 return !SRE_UNI_IS_WORD(ch);
207 case SRE_CATEGORY_UNI_LINEBREAK:
208 return SRE_UNI_IS_LINEBREAK(ch);
209 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
210 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000211 }
212 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000213}
214
215/* helpers */
216
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000217static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000218data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000219{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000220 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000221 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000222 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000223 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000224 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000225}
226
227static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000228data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000229{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000230 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000231 minsize = state->data_stack_base+size;
232 cursize = state->data_stack_size;
233 if (cursize < minsize) {
234 void* stack;
235 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300236 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000237 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000238 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000239 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000240 return SRE_ERROR_MEMORY;
241 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000242 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000243 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000244 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000245 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000246}
247
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000248/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000249
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300250#define SRE_CHAR Py_UCS1
251#define SIZEOF_SRE_CHAR 1
252#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300253#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000254
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300255/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000256
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300257#define SRE_CHAR Py_UCS2
258#define SIZEOF_SRE_CHAR 2
259#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300260#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000261
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300262/* generate 32-bit unicode version */
263
264#define SRE_CHAR Py_UCS4
265#define SIZEOF_SRE_CHAR 4
266#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300267#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000268
269/* -------------------------------------------------------------------- */
270/* factories and destructors */
271
272/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100273static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600274static PyObject*pattern_scanner(PatternObject*, PyObject*, PyObject* kw);
Guido van Rossumb700df92000-03-31 14:59:30 +0000275
276static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000277sre_codesize(PyObject* self, PyObject *unused)
Guido van Rossumb700df92000-03-31 14:59:30 +0000278{
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100279 return PyLong_FromSize_t(sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +0000280}
281
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000282static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +0000283sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000284{
285 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000286 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000287 return NULL;
288 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000289 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000290 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000291 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundhb389df32000-06-29 12:48:37 +0000292 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000293}
294
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000295LOCAL(void)
296state_reset(SRE_STATE* state)
297{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000298 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000299 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000300
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000301 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000302 state->lastindex = -1;
303
304 state->repeat = NULL;
305
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000306 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000307}
308
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000309static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200310getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300311 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600312 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000313{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000314 /* given a python object, return a data pointer, a length (in
315 characters), and a character size. return NULL if the object
316 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000317
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000318 /* Unicode objects do not support the buffer API. So, get the data
319 directly instead. */
320 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200321 if (PyUnicode_READY(string) == -1)
322 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200323 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200324 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300325 *p_isbytes = 0;
326 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000327 }
328
Victor Stinner0058b862011-09-29 03:27:47 +0200329 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300330 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
331 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
332 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000333 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000334
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300335 *p_length = view->len;
336 *p_charsize = 1;
337 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000338
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300339 if (view->buf == NULL) {
340 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
341 PyBuffer_Release(view);
342 view->buf = NULL;
343 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000344 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300345 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000346}
347
348LOCAL(PyObject*)
349state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000350 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000351{
352 /* prepare state object */
353
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000354 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300355 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000356 void* ptr;
357
358 memset(state, 0, sizeof(SRE_STATE));
359
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000360 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000361 state->lastindex = -1;
362
Benjamin Petersone48944b2012-03-07 14:50:25 -0600363 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300364 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000365 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600366 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000367
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300368 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600369 PyErr_SetString(PyExc_TypeError,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300370 "can't use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600371 goto err;
372 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300373 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600374 PyErr_SetString(PyExc_TypeError,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300375 "can't use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600376 goto err;
377 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000378
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000379 /* adjust boundaries */
380 if (start < 0)
381 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000382 else if (start > length)
383 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000384
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000385 if (end < 0)
386 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000387 else if (end > length)
388 end = length;
389
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300390 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000391 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000392
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000393 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000394
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000395 state->start = (void*) ((char*) ptr + start * state->charsize);
396 state->end = (void*) ((char*) ptr + end * state->charsize);
397
398 Py_INCREF(string);
399 state->string = string;
400 state->pos = start;
401 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000402
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000403 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000404 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000405 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000406 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000407 else
Fredrik Lundhb389df32000-06-29 12:48:37 +0000408 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000409
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000410 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600411 err:
412 if (state->buffer.buf)
413 PyBuffer_Release(&state->buffer);
414 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000415}
416
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000417LOCAL(void)
418state_fini(SRE_STATE* state)
419{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600420 if (state->buffer.buf)
421 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000422 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000423 data_stack_dealloc(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000424}
425
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000426/* calculate offset from start of string */
427#define STATE_OFFSET(state, member)\
428 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
429
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000430LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300431getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300432 PyObject* string, Py_ssize_t start, Py_ssize_t end)
433{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300434 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300435 if (PyBytes_CheckExact(string) &&
436 start == 0 && end == PyBytes_GET_SIZE(string)) {
437 Py_INCREF(string);
438 return string;
439 }
440 return PyBytes_FromStringAndSize(
441 (const char *)ptr + start, end - start);
442 }
443 else {
444 return PyUnicode_Substring(string, start, end);
445 }
446}
447
448LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000449state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000450{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000451 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000452
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000453 index = (index - 1) * 2;
454
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000455 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000456 if (empty)
457 /* want empty string */
458 i = j = 0;
459 else {
460 Py_INCREF(Py_None);
461 return Py_None;
462 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000463 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000464 i = STATE_OFFSET(state, state->mark[index]);
465 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000466 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000467
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300468 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000469}
470
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000471static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100472pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000473{
474 switch (status) {
475 case SRE_ERROR_RECURSION_LIMIT:
476 PyErr_SetString(
477 PyExc_RuntimeError,
478 "maximum recursion limit exceeded"
479 );
480 break;
481 case SRE_ERROR_MEMORY:
482 PyErr_NoMemory();
483 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000484 case SRE_ERROR_INTERRUPTED:
485 /* An exception has already been raised, so let it fly */
486 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000487 default:
488 /* other error codes indicate compiler/engine bugs */
489 PyErr_SetString(
490 PyExc_RuntimeError,
491 "internal error in regular expression engine"
492 );
493 }
494}
495
Guido van Rossumb700df92000-03-31 14:59:30 +0000496static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000497pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000498{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000499 if (self->weakreflist != NULL)
500 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000501 Py_XDECREF(self->pattern);
502 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000503 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000504 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000505}
506
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300507LOCAL(Py_ssize_t)
508sre_match(SRE_STATE* state, SRE_CODE* pattern)
509{
510 if (state->charsize == 1)
511 return sre_ucs1_match(state, pattern);
512 if (state->charsize == 2)
513 return sre_ucs2_match(state, pattern);
514 assert(state->charsize == 4);
515 return sre_ucs4_match(state, pattern);
516}
517
518LOCAL(Py_ssize_t)
519sre_search(SRE_STATE* state, SRE_CODE* pattern)
520{
521 if (state->charsize == 1)
522 return sre_ucs1_search(state, pattern);
523 if (state->charsize == 2)
524 return sre_ucs2_search(state, pattern);
525 assert(state->charsize == 4);
526 return sre_ucs4_search(state, pattern);
527}
528
Larry Hastingsdf7c22b2014-01-07 14:25:26 -0800529/*[clinic input]
Larry Hastings16c51912014-01-07 11:53:01 -0800530module _sre
Larry Hastingsc2047262014-01-25 20:43:29 -0800531class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
Larry Hastings16c51912014-01-07 11:53:01 -0800532
533_sre.SRE_Pattern.match as pattern_match
534
Larry Hastings16c51912014-01-07 11:53:01 -0800535 pattern: object
536 pos: Py_ssize_t = 0
537 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
538
539Matches zero or more characters at the beginning of the string.
Larry Hastingsdf7c22b2014-01-07 14:25:26 -0800540[clinic start generated code]*/
Larry Hastings16c51912014-01-07 11:53:01 -0800541
542PyDoc_STRVAR(pattern_match__doc__,
Larry Hastings2623c8c2014-02-08 22:15:29 -0800543"match($self, /, pattern, pos=0, endpos=sys.maxsize)\n"
544"--\n"
545"\n"
Larry Hastings16c51912014-01-07 11:53:01 -0800546"Matches zero or more characters at the beginning of the string.");
547
548#define PATTERN_MATCH_METHODDEF \
549 {"match", (PyCFunction)pattern_match, METH_VARARGS|METH_KEYWORDS, pattern_match__doc__},
550
551static PyObject *
552pattern_match_impl(PatternObject *self, PyObject *pattern, Py_ssize_t pos, Py_ssize_t endpos);
553
554static PyObject *
Larry Hastings5c661892014-01-24 06:17:25 -0800555pattern_match(PatternObject *self, PyObject *args, PyObject *kwargs)
Larry Hastings16c51912014-01-07 11:53:01 -0800556{
557 PyObject *return_value = NULL;
558 static char *_keywords[] = {"pattern", "pos", "endpos", NULL};
559 PyObject *pattern;
560 Py_ssize_t pos = 0;
561 Py_ssize_t endpos = PY_SSIZE_T_MAX;
562
563 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
564 "O|nn:match", _keywords,
565 &pattern, &pos, &endpos))
566 goto exit;
Larry Hastings5c661892014-01-24 06:17:25 -0800567 return_value = pattern_match_impl(self, pattern, pos, endpos);
Larry Hastings16c51912014-01-07 11:53:01 -0800568
569exit:
570 return return_value;
571}
572
573static PyObject *
574pattern_match_impl(PatternObject *self, PyObject *pattern, Py_ssize_t pos, Py_ssize_t endpos)
Larry Hastings2623c8c2014-02-08 22:15:29 -0800575/*[clinic end generated code: output=1528eafdb8b025ad input=26f9fd31befe46b9]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000576{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000577 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100578 Py_ssize_t status;
Larry Hastings16c51912014-01-07 11:53:01 -0800579 PyObject *string;
Guido van Rossumb700df92000-03-31 14:59:30 +0000580
Larry Hastings16c51912014-01-07 11:53:01 -0800581 string = state_init(&state, (PatternObject *)self, pattern, pos, endpos);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000582 if (!string)
583 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000584
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000585 state.ptr = state.start;
586
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000587 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
588
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300589 status = sre_match(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000590
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000591 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Thomas Wouters89f507f2006-12-13 04:49:30 +0000592 if (PyErr_Occurred())
593 return NULL;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000594
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000595 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +0000596
Larry Hastings16c51912014-01-07 11:53:01 -0800597 return (PyObject *)pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +0000598}
599
600static PyObject*
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200601pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kw)
602{
603 SRE_STATE state;
604 Py_ssize_t status;
605
606 PyObject* string;
607 Py_ssize_t start = 0;
608 Py_ssize_t end = PY_SSIZE_T_MAX;
609 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
610 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:fullmatch", kwlist,
611 &string, &start, &end))
612 return NULL;
613
614 string = state_init(&state, self, string, start, end);
615 if (!string)
616 return NULL;
617
618 state.match_all = 1;
619 state.ptr = state.start;
620
621 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
622
623 status = sre_match(&state, PatternObject_GetCode(self));
624
625 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
626 if (PyErr_Occurred())
627 return NULL;
628
629 state_fini(&state);
630
631 return pattern_new_match(self, &state, status);
632}
633
634static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +0000635pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +0000636{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000637 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100638 Py_ssize_t status;
Guido van Rossumb700df92000-03-31 14:59:30 +0000639
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000640 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000641 Py_ssize_t start = 0;
642 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +0000643 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000644 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:search", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +0000645 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000646 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000647
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000648 string = state_init(&state, self, string, start, end);
649 if (!string)
650 return NULL;
651
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000652 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
653
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300654 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000655
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000656 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
657
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000658 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +0000659
Thomas Wouters89f507f2006-12-13 04:49:30 +0000660 if (PyErr_Occurred())
661 return NULL;
662
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000663 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +0000664}
665
666static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000667call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000668{
669 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000670 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000671 PyObject* func;
672 PyObject* result;
673
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000674 if (!args)
675 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000676 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000677 if (!name)
678 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000679 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000680 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000681 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000682 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000683 func = PyObject_GetAttrString(mod, function);
684 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000685 if (!func)
686 return NULL;
687 result = PyObject_CallObject(func, args);
688 Py_DECREF(func);
689 Py_DECREF(args);
690 return result;
691}
692
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000693#ifdef USE_BUILTIN_COPY
694static int
695deepcopy(PyObject** object, PyObject* memo)
696{
697 PyObject* copy;
698
699 copy = call(
700 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000701 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000702 );
703 if (!copy)
704 return 0;
705
706 Py_DECREF(*object);
707 *object = copy;
708
709 return 1; /* success */
710}
711#endif
712
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000713static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +0000714pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +0000715{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000716 SRE_STATE state;
717 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100718 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000719 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000720
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000721 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000722 Py_ssize_t start = 0;
723 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +0000724 static char* kwlist[] = { "source", "pos", "endpos", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000725 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +0000726 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000727 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000728
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000729 string = state_init(&state, self, string, start, end);
730 if (!string)
731 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000732
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000733 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000734 if (!list) {
735 state_fini(&state);
736 return NULL;
737 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000738
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000739 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000740
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000741 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000742
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000743 state_reset(&state);
744
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000745 state.ptr = state.start;
746
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300747 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300748 if (PyErr_Occurred())
749 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000750
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000751 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000752 if (status == 0)
753 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000754 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000755 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000756 }
Tim Peters3d563502006-01-21 02:47:53 +0000757
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000758 /* don't bother to build a match object */
759 switch (self->groups) {
760 case 0:
761 b = STATE_OFFSET(&state, state.start);
762 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300763 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300764 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000765 if (!item)
766 goto error;
767 break;
768 case 1:
769 item = state_getslice(&state, 1, string, 1);
770 if (!item)
771 goto error;
772 break;
773 default:
774 item = PyTuple_New(self->groups);
775 if (!item)
776 goto error;
777 for (i = 0; i < self->groups; i++) {
778 PyObject* o = state_getslice(&state, i+1, string, 1);
779 if (!o) {
780 Py_DECREF(item);
781 goto error;
782 }
783 PyTuple_SET_ITEM(item, i, o);
784 }
785 break;
786 }
787
788 status = PyList_Append(list, item);
789 Py_DECREF(item);
790 if (status < 0)
791 goto error;
792
793 if (state.ptr == state.start)
794 state.start = (void*) ((char*) state.ptr + state.charsize);
795 else
796 state.start = state.ptr;
797
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000798 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000799
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000800 state_fini(&state);
801 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000802
803error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000804 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000805 state_fini(&state);
806 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000807
Guido van Rossumb700df92000-03-31 14:59:30 +0000808}
809
Fredrik Lundh703ce812001-10-24 22:16:30 +0000810static PyObject*
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600811pattern_finditer(PatternObject* pattern, PyObject* args, PyObject* kw)
Fredrik Lundh703ce812001-10-24 22:16:30 +0000812{
813 PyObject* scanner;
814 PyObject* search;
815 PyObject* iterator;
816
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600817 scanner = pattern_scanner(pattern, args, kw);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000818 if (!scanner)
819 return NULL;
820
821 search = PyObject_GetAttrString(scanner, "search");
822 Py_DECREF(scanner);
823 if (!search)
824 return NULL;
825
826 iterator = PyCallIter_New(search, Py_None);
827 Py_DECREF(search);
828
829 return iterator;
830}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000831
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000832static PyObject*
833pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
834{
835 SRE_STATE state;
836 PyObject* list;
837 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100838 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000839 Py_ssize_t n;
840 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000841 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000842
843 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000844 Py_ssize_t maxsplit = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +0000845 static char* kwlist[] = { "source", "maxsplit", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000846 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|n:split", kwlist,
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000847 &string, &maxsplit))
848 return NULL;
849
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000850 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000851 if (!string)
852 return NULL;
853
854 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000855 if (!list) {
856 state_fini(&state);
857 return NULL;
858 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000859
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000860 n = 0;
861 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000862
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000863 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000864
865 state_reset(&state);
866
867 state.ptr = state.start;
868
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300869 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300870 if (PyErr_Occurred())
871 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000872
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000873 if (status <= 0) {
874 if (status == 0)
875 break;
876 pattern_error(status);
877 goto error;
878 }
Tim Peters3d563502006-01-21 02:47:53 +0000879
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000880 if (state.start == state.ptr) {
881 if (last == state.end)
882 break;
883 /* skip one character */
884 state.start = (void*) ((char*) state.ptr + state.charsize);
885 continue;
886 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000887
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000888 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300889 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000890 string, STATE_OFFSET(&state, last),
891 STATE_OFFSET(&state, state.start)
892 );
893 if (!item)
894 goto error;
895 status = PyList_Append(list, item);
896 Py_DECREF(item);
897 if (status < 0)
898 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000899
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000900 /* add groups (if any) */
901 for (i = 0; i < self->groups; i++) {
902 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000903 if (!item)
904 goto error;
905 status = PyList_Append(list, item);
906 Py_DECREF(item);
907 if (status < 0)
908 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000909 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000910
911 n = n + 1;
912
913 last = state.start = state.ptr;
914
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000915 }
916
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000917 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300918 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000919 string, STATE_OFFSET(&state, last), state.endpos
920 );
921 if (!item)
922 goto error;
923 status = PyList_Append(list, item);
924 Py_DECREF(item);
925 if (status < 0)
926 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000927
928 state_fini(&state);
929 return list;
930
931error:
932 Py_DECREF(list);
933 state_fini(&state);
934 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000935
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000936}
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000937
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000938static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000939pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000940 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000941{
942 SRE_STATE state;
943 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +0300944 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000945 PyObject* item;
946 PyObject* filter;
947 PyObject* args;
948 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000949 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100950 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000951 Py_ssize_t n;
952 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300953 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000954 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600955 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000956
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000957 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +0000958 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000959 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +0000960 Py_INCREF(filter);
961 filter_is_callable = 1;
962 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000963 /* if not callable, check if it's a literal string */
964 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600965 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300966 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200967 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000968 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300969 if (charsize == 1)
970 literal = memchr(ptr, '\\', n) == NULL;
971 else
972 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000973 } else {
974 PyErr_Clear();
975 literal = 0;
976 }
Benjamin Petersone48944b2012-03-07 14:50:25 -0600977 if (view.buf)
978 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000979 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000980 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000981 Py_INCREF(filter);
982 filter_is_callable = 0;
983 } else {
984 /* not a literal; hand it over to the template compiler */
985 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +0000986 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000987 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000988 );
989 if (!filter)
990 return NULL;
991 filter_is_callable = PyCallable_Check(filter);
992 }
Fredrik Lundhdac58492001-10-21 21:48:30 +0000993 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000994
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000995 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh82b23072001-12-09 16:13:15 +0000996 if (!string) {
997 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000998 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +0000999 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001000
1001 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001002 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001003 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001004 state_fini(&state);
1005 return NULL;
1006 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001007
1008 n = i = 0;
1009
1010 while (!count || n < count) {
1011
1012 state_reset(&state);
1013
1014 state.ptr = state.start;
1015
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001016 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001017 if (PyErr_Occurred())
1018 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001019
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001020 if (status <= 0) {
1021 if (status == 0)
1022 break;
1023 pattern_error(status);
1024 goto error;
1025 }
Tim Peters3d563502006-01-21 02:47:53 +00001026
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001027 b = STATE_OFFSET(&state, state.start);
1028 e = STATE_OFFSET(&state, state.ptr);
1029
1030 if (i < b) {
1031 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001032 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001033 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001034 if (!item)
1035 goto error;
1036 status = PyList_Append(list, item);
1037 Py_DECREF(item);
1038 if (status < 0)
1039 goto error;
1040
1041 } else if (i == b && i == e && n > 0)
1042 /* ignore empty match on latest position */
1043 goto next;
1044
1045 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001046 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001047 match = pattern_new_match(self, &state, 1);
1048 if (!match)
1049 goto error;
Raymond Hettinger8ae46892003-10-12 19:09:37 +00001050 args = PyTuple_Pack(1, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001051 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00001052 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001053 goto error;
1054 }
1055 item = PyObject_CallObject(filter, args);
1056 Py_DECREF(args);
1057 Py_DECREF(match);
1058 if (!item)
1059 goto error;
1060 } else {
1061 /* filter is literal string */
1062 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001063 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001064 }
1065
1066 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001067 if (item != Py_None) {
1068 status = PyList_Append(list, item);
1069 Py_DECREF(item);
1070 if (status < 0)
1071 goto error;
1072 }
Tim Peters3d563502006-01-21 02:47:53 +00001073
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001074 i = e;
1075 n = n + 1;
1076
1077next:
1078 /* move on */
1079 if (state.ptr == state.start)
1080 state.start = (void*) ((char*) state.ptr + state.charsize);
1081 else
1082 state.start = state.ptr;
1083
1084 }
1085
1086 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001087 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001088 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001089 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001090 if (!item)
1091 goto error;
1092 status = PyList_Append(list, item);
1093 Py_DECREF(item);
1094 if (status < 0)
1095 goto error;
1096 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001097
1098 state_fini(&state);
1099
Guido van Rossum4e173842001-12-07 04:25:10 +00001100 Py_DECREF(filter);
1101
Fredrik Lundhdac58492001-10-21 21:48:30 +00001102 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001103 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001104 if (!joiner) {
1105 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001106 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001107 }
1108 if (PyList_GET_SIZE(list) == 0) {
1109 Py_DECREF(list);
1110 item = joiner;
1111 }
1112 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001113 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001114 item = _PyBytes_Join(joiner, list);
1115 else
1116 item = PyUnicode_Join(joiner, list);
1117 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001118 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001119 if (!item)
1120 return NULL;
1121 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001122
1123 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001124 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001125
1126 return item;
1127
1128error:
1129 Py_DECREF(list);
1130 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001131 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001132 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001133
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001134}
1135
1136static PyObject*
1137pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
1138{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001139 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001140 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001141 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001142 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001143 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001144 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001145 return NULL;
1146
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001147 return pattern_subx(self, ptemplate, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001148}
1149
1150static PyObject*
1151pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
1152{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001153 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001154 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001155 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001156 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001157 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001158 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001159 return NULL;
1160
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001161 return pattern_subx(self, ptemplate, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001162}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001163
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001164static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001165pattern_copy(PatternObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001166{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001167#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001168 PatternObject* copy;
1169 int offset;
1170
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001171 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
1172 if (!copy)
1173 return NULL;
1174
1175 offset = offsetof(PatternObject, groups);
1176
1177 Py_XINCREF(self->groupindex);
1178 Py_XINCREF(self->indexgroup);
1179 Py_XINCREF(self->pattern);
1180
1181 memcpy((char*) copy + offset, (char*) self + offset,
1182 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00001183 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001184
1185 return (PyObject*) copy;
1186#else
1187 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
1188 return NULL;
1189#endif
1190}
1191
1192static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001193pattern_deepcopy(PatternObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001194{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001195#ifdef USE_BUILTIN_COPY
1196 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00001197
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001198 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001199 if (!copy)
1200 return NULL;
1201
1202 if (!deepcopy(&copy->groupindex, memo) ||
1203 !deepcopy(&copy->indexgroup, memo) ||
1204 !deepcopy(&copy->pattern, memo)) {
1205 Py_DECREF(copy);
1206 return NULL;
1207 }
1208
1209#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001210 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
1211 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001212#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001213}
1214
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001215static PyObject *
1216pattern_repr(PatternObject *obj)
1217{
1218 static const struct {
1219 const char *name;
1220 int value;
1221 } flag_names[] = {
1222 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1223 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1224 {"re.LOCALE", SRE_FLAG_LOCALE},
1225 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1226 {"re.DOTALL", SRE_FLAG_DOTALL},
1227 {"re.UNICODE", SRE_FLAG_UNICODE},
1228 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1229 {"re.DEBUG", SRE_FLAG_DEBUG},
1230 {"re.ASCII", SRE_FLAG_ASCII},
1231 };
1232 PyObject *result = NULL;
1233 PyObject *flag_items;
1234 int i;
1235 int flags = obj->flags;
1236
1237 /* Omit re.UNICODE for valid string patterns. */
1238 if (obj->isbytes == 0 &&
1239 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1240 SRE_FLAG_UNICODE)
1241 flags &= ~SRE_FLAG_UNICODE;
1242
1243 flag_items = PyList_New(0);
1244 if (!flag_items)
1245 return NULL;
1246
1247 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1248 if (flags & flag_names[i].value) {
1249 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1250 if (!item)
1251 goto done;
1252
1253 if (PyList_Append(flag_items, item) < 0) {
1254 Py_DECREF(item);
1255 goto done;
1256 }
1257 Py_DECREF(item);
1258 flags &= ~flag_names[i].value;
1259 }
1260 }
1261 if (flags) {
1262 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1263 if (!item)
1264 goto done;
1265
1266 if (PyList_Append(flag_items, item) < 0) {
1267 Py_DECREF(item);
1268 goto done;
1269 }
1270 Py_DECREF(item);
1271 }
1272
1273 if (PyList_Size(flag_items) > 0) {
1274 PyObject *flags_result;
1275 PyObject *sep = PyUnicode_FromString("|");
1276 if (!sep)
1277 goto done;
1278 flags_result = PyUnicode_Join(sep, flag_items);
1279 Py_DECREF(sep);
1280 if (!flags_result)
1281 goto done;
1282 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1283 obj->pattern, flags_result);
1284 Py_DECREF(flags_result);
1285 }
1286 else {
1287 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1288 }
1289
1290done:
1291 Py_DECREF(flag_items);
1292 return result;
1293}
1294
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02001295PyDoc_STRVAR(pattern_fullmatch_doc,
1296"fullmatch(string[, pos[, endpos]]) -> match object or None.\n\
1297 Matches against all of the string");
1298
Raymond Hettinger94478742004-09-24 04:31:19 +00001299PyDoc_STRVAR(pattern_search_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001300"search(string[, pos[, endpos]]) -> match object or None.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001301 Scan through string looking for a match, and return a corresponding\n\
Andrew Svetlov0b64c142012-12-25 18:48:54 +02001302 match object instance. Return None if no position in the string matches.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001303
1304PyDoc_STRVAR(pattern_split_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001305"split(string[, maxsplit = 0]) -> list.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001306 Split string by the occurrences of pattern.");
1307
1308PyDoc_STRVAR(pattern_findall_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001309"findall(string[, pos[, endpos]]) -> list.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001310 Return a list of all non-overlapping matches of pattern in string.");
1311
1312PyDoc_STRVAR(pattern_finditer_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001313"finditer(string[, pos[, endpos]]) -> iterator.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001314 Return an iterator over all non-overlapping matches for the \n\
1315 RE pattern in string. For each match, the iterator returns a\n\
1316 match object.");
1317
1318PyDoc_STRVAR(pattern_sub_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001319"sub(repl, string[, count = 0]) -> newstring.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001320 Return the string obtained by replacing the leftmost non-overlapping\n\
Tim Peters3d563502006-01-21 02:47:53 +00001321 occurrences of pattern in string by the replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001322
1323PyDoc_STRVAR(pattern_subn_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001324"subn(repl, string[, count = 0]) -> (newstring, number of subs)\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001325 Return the tuple (new_string, number_of_subs_made) found by replacing\n\
1326 the leftmost non-overlapping occurrences of pattern with the\n\
Tim Peters3d563502006-01-21 02:47:53 +00001327 replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001328
1329PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
1330
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001331static PyMethodDef pattern_methods[] = {
Larry Hastings16c51912014-01-07 11:53:01 -08001332 PATTERN_MATCH_METHODDEF
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02001333 {"fullmatch", (PyCFunction) pattern_fullmatch, METH_VARARGS|METH_KEYWORDS,
1334 pattern_fullmatch_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001335 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001336 pattern_search_doc},
Raymond Hettinger94478742004-09-24 04:31:19 +00001337 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001338 pattern_sub_doc},
Raymond Hettinger94478742004-09-24 04:31:19 +00001339 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001340 pattern_subn_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001341 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001342 pattern_split_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001343 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001344 pattern_findall_doc},
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001345 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001346 pattern_finditer_doc},
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001347 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS|METH_KEYWORDS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001348 {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
1349 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001350 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001351};
1352
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00001353#define PAT_OFF(x) offsetof(PatternObject, x)
1354static PyMemberDef pattern_members[] = {
1355 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
1356 {"flags", T_INT, PAT_OFF(flags), READONLY},
1357 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
1358 {"groupindex", T_OBJECT, PAT_OFF(groupindex), READONLY},
1359 {NULL} /* Sentinel */
1360};
Guido van Rossumb700df92000-03-31 14:59:30 +00001361
Neal Norwitz57c179c2006-03-22 07:18:02 +00001362static PyTypeObject Pattern_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001363 PyVarObject_HEAD_INIT(NULL, 0)
1364 "_" SRE_MODULE ".SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001365 sizeof(PatternObject), sizeof(SRE_CODE),
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001366 (destructor)pattern_dealloc, /* tp_dealloc */
1367 0, /* tp_print */
1368 0, /* tp_getattr */
1369 0, /* tp_setattr */
1370 0, /* tp_reserved */
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001371 (reprfunc)pattern_repr, /* tp_repr */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001372 0, /* tp_as_number */
1373 0, /* tp_as_sequence */
1374 0, /* tp_as_mapping */
1375 0, /* tp_hash */
1376 0, /* tp_call */
1377 0, /* tp_str */
1378 0, /* tp_getattro */
1379 0, /* tp_setattro */
1380 0, /* tp_as_buffer */
1381 Py_TPFLAGS_DEFAULT, /* tp_flags */
1382 pattern_doc, /* tp_doc */
1383 0, /* tp_traverse */
1384 0, /* tp_clear */
1385 0, /* tp_richcompare */
1386 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
1387 0, /* tp_iter */
1388 0, /* tp_iternext */
1389 pattern_methods, /* tp_methods */
1390 pattern_members, /* tp_members */
Guido van Rossumb700df92000-03-31 14:59:30 +00001391};
1392
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001393static int _validate(PatternObject *self); /* Forward */
1394
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001395static PyObject *
1396_compile(PyObject* self_, PyObject* args)
1397{
1398 /* "compile" pattern descriptor to pattern object */
1399
1400 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001401 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001402
1403 PyObject* pattern;
1404 int flags = 0;
1405 PyObject* code;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001406 Py_ssize_t groups = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001407 PyObject* groupindex = NULL;
1408 PyObject* indexgroup = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001409
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001410 if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001411 &PyList_Type, &code, &groups,
1412 &groupindex, &indexgroup))
1413 return NULL;
1414
1415 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001416 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001417 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1418 if (!self)
1419 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001420 self->weakreflist = NULL;
1421 self->pattern = NULL;
1422 self->groupindex = NULL;
1423 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001424
1425 self->codesize = n;
1426
1427 for (i = 0; i < n; i++) {
1428 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001429 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001430 self->code[i] = (SRE_CODE) value;
1431 if ((unsigned long) self->code[i] != value) {
1432 PyErr_SetString(PyExc_OverflowError,
1433 "regular expression code size limit exceeded");
1434 break;
1435 }
1436 }
1437
1438 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001439 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001440 return NULL;
1441 }
1442
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001443 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001444 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001445 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001446 else {
1447 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001448 int charsize;
1449 Py_buffer view;
1450 view.buf = NULL;
1451 if (!getstring(pattern, &p_length, &self->isbytes,
1452 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001453 Py_DECREF(self);
1454 return NULL;
1455 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001456 if (view.buf)
1457 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001458 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001459
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001460 Py_INCREF(pattern);
1461 self->pattern = pattern;
1462
1463 self->flags = flags;
1464
1465 self->groups = groups;
1466
1467 Py_XINCREF(groupindex);
1468 self->groupindex = groupindex;
1469
1470 Py_XINCREF(indexgroup);
1471 self->indexgroup = indexgroup;
1472
1473 self->weakreflist = NULL;
1474
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001475 if (!_validate(self)) {
1476 Py_DECREF(self);
1477 return NULL;
1478 }
1479
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001480 return (PyObject*) self;
1481}
1482
Guido van Rossumb700df92000-03-31 14:59:30 +00001483/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001484/* Code validation */
1485
1486/* To learn more about this code, have a look at the _compile() function in
1487 Lib/sre_compile.py. The validation functions below checks the code array
1488 for conformance with the code patterns generated there.
1489
1490 The nice thing about the generated code is that it is position-independent:
1491 all jumps are relative jumps forward. Also, jumps don't cross each other:
1492 the target of a later jump is always earlier than the target of an earlier
1493 jump. IOW, this is okay:
1494
1495 J---------J-------T--------T
1496 \ \_____/ /
1497 \______________________/
1498
1499 but this is not:
1500
1501 J---------J-------T--------T
1502 \_________\_____/ /
1503 \____________/
1504
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001505 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001506*/
1507
1508/* Defining this one enables tracing of the validator */
1509#undef VVERBOSE
1510
1511/* Trace macro for the validator */
1512#if defined(VVERBOSE)
1513#define VTRACE(v) printf v
1514#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001515#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001516#endif
1517
1518/* Report failure */
1519#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1520
1521/* Extract opcode, argument, or skip count from code array */
1522#define GET_OP \
1523 do { \
1524 VTRACE(("%p: ", code)); \
1525 if (code >= end) FAIL; \
1526 op = *code++; \
1527 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1528 } while (0)
1529#define GET_ARG \
1530 do { \
1531 VTRACE(("%p= ", code)); \
1532 if (code >= end) FAIL; \
1533 arg = *code++; \
1534 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1535 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001536#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001537 do { \
1538 VTRACE(("%p= ", code)); \
1539 if (code >= end) FAIL; \
1540 skip = *code; \
1541 VTRACE(("%lu (skip to %p)\n", \
1542 (unsigned long)skip, code+skip)); \
Victor Stinner1fa174a2013-08-28 02:06:21 +02001543 if (skip-adj > (Py_uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001544 FAIL; \
1545 code++; \
1546 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001547#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001548
1549static int
1550_validate_charset(SRE_CODE *code, SRE_CODE *end)
1551{
1552 /* Some variables are manipulated by the macros above */
1553 SRE_CODE op;
1554 SRE_CODE arg;
1555 SRE_CODE offset;
1556 int i;
1557
1558 while (code < end) {
1559 GET_OP;
1560 switch (op) {
1561
1562 case SRE_OP_NEGATE:
1563 break;
1564
1565 case SRE_OP_LITERAL:
1566 GET_ARG;
1567 break;
1568
1569 case SRE_OP_RANGE:
1570 GET_ARG;
1571 GET_ARG;
1572 break;
1573
1574 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001575 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001576 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001577 FAIL;
1578 code += offset;
1579 break;
1580
1581 case SRE_OP_BIGCHARSET:
1582 GET_ARG; /* Number of blocks */
1583 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001584 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001585 FAIL;
1586 /* Make sure that each byte points to a valid block */
1587 for (i = 0; i < 256; i++) {
1588 if (((unsigned char *)code)[i] >= arg)
1589 FAIL;
1590 }
1591 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001592 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001593 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001594 FAIL;
1595 code += offset;
1596 break;
1597
1598 case SRE_OP_CATEGORY:
1599 GET_ARG;
1600 switch (arg) {
1601 case SRE_CATEGORY_DIGIT:
1602 case SRE_CATEGORY_NOT_DIGIT:
1603 case SRE_CATEGORY_SPACE:
1604 case SRE_CATEGORY_NOT_SPACE:
1605 case SRE_CATEGORY_WORD:
1606 case SRE_CATEGORY_NOT_WORD:
1607 case SRE_CATEGORY_LINEBREAK:
1608 case SRE_CATEGORY_NOT_LINEBREAK:
1609 case SRE_CATEGORY_LOC_WORD:
1610 case SRE_CATEGORY_LOC_NOT_WORD:
1611 case SRE_CATEGORY_UNI_DIGIT:
1612 case SRE_CATEGORY_UNI_NOT_DIGIT:
1613 case SRE_CATEGORY_UNI_SPACE:
1614 case SRE_CATEGORY_UNI_NOT_SPACE:
1615 case SRE_CATEGORY_UNI_WORD:
1616 case SRE_CATEGORY_UNI_NOT_WORD:
1617 case SRE_CATEGORY_UNI_LINEBREAK:
1618 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1619 break;
1620 default:
1621 FAIL;
1622 }
1623 break;
1624
1625 default:
1626 FAIL;
1627
1628 }
1629 }
1630
1631 return 1;
1632}
1633
1634static int
1635_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1636{
1637 /* Some variables are manipulated by the macros above */
1638 SRE_CODE op;
1639 SRE_CODE arg;
1640 SRE_CODE skip;
1641
1642 VTRACE(("code=%p, end=%p\n", code, end));
1643
1644 if (code > end)
1645 FAIL;
1646
1647 while (code < end) {
1648 GET_OP;
1649 switch (op) {
1650
1651 case SRE_OP_MARK:
1652 /* We don't check whether marks are properly nested; the
1653 sre_match() code is robust even if they don't, and the worst
1654 you can get is nonsensical match results. */
1655 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001656 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001657 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1658 FAIL;
1659 }
1660 break;
1661
1662 case SRE_OP_LITERAL:
1663 case SRE_OP_NOT_LITERAL:
1664 case SRE_OP_LITERAL_IGNORE:
1665 case SRE_OP_NOT_LITERAL_IGNORE:
1666 GET_ARG;
1667 /* The arg is just a character, nothing to check */
1668 break;
1669
1670 case SRE_OP_SUCCESS:
1671 case SRE_OP_FAILURE:
1672 /* Nothing to check; these normally end the matching process */
1673 break;
1674
1675 case SRE_OP_AT:
1676 GET_ARG;
1677 switch (arg) {
1678 case SRE_AT_BEGINNING:
1679 case SRE_AT_BEGINNING_STRING:
1680 case SRE_AT_BEGINNING_LINE:
1681 case SRE_AT_END:
1682 case SRE_AT_END_LINE:
1683 case SRE_AT_END_STRING:
1684 case SRE_AT_BOUNDARY:
1685 case SRE_AT_NON_BOUNDARY:
1686 case SRE_AT_LOC_BOUNDARY:
1687 case SRE_AT_LOC_NON_BOUNDARY:
1688 case SRE_AT_UNI_BOUNDARY:
1689 case SRE_AT_UNI_NON_BOUNDARY:
1690 break;
1691 default:
1692 FAIL;
1693 }
1694 break;
1695
1696 case SRE_OP_ANY:
1697 case SRE_OP_ANY_ALL:
1698 /* These have no operands */
1699 break;
1700
1701 case SRE_OP_IN:
1702 case SRE_OP_IN_IGNORE:
1703 GET_SKIP;
1704 /* Stop 1 before the end; we check the FAILURE below */
1705 if (!_validate_charset(code, code+skip-2))
1706 FAIL;
1707 if (code[skip-2] != SRE_OP_FAILURE)
1708 FAIL;
1709 code += skip-1;
1710 break;
1711
1712 case SRE_OP_INFO:
1713 {
1714 /* A minimal info field is
1715 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1716 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1717 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001718 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001719 SRE_CODE *newcode;
1720 GET_SKIP;
1721 newcode = code+skip-1;
1722 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001723 GET_ARG;
1724 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001725 /* Check that only valid flags are present */
1726 if ((flags & ~(SRE_INFO_PREFIX |
1727 SRE_INFO_LITERAL |
1728 SRE_INFO_CHARSET)) != 0)
1729 FAIL;
1730 /* PREFIX and CHARSET are mutually exclusive */
1731 if ((flags & SRE_INFO_PREFIX) &&
1732 (flags & SRE_INFO_CHARSET))
1733 FAIL;
1734 /* LITERAL implies PREFIX */
1735 if ((flags & SRE_INFO_LITERAL) &&
1736 !(flags & SRE_INFO_PREFIX))
1737 FAIL;
1738 /* Validate the prefix */
1739 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001740 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001741 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001742 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001743 /* Here comes the prefix string */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001744 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001745 FAIL;
1746 code += prefix_len;
1747 /* And here comes the overlap table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001748 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001749 FAIL;
1750 /* Each overlap value should be < prefix_len */
1751 for (i = 0; i < prefix_len; i++) {
1752 if (code[i] >= prefix_len)
1753 FAIL;
1754 }
1755 code += prefix_len;
1756 }
1757 /* Validate the charset */
1758 if (flags & SRE_INFO_CHARSET) {
1759 if (!_validate_charset(code, newcode-1))
1760 FAIL;
1761 if (newcode[-1] != SRE_OP_FAILURE)
1762 FAIL;
1763 code = newcode;
1764 }
1765 else if (code != newcode) {
1766 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1767 FAIL;
1768 }
1769 }
1770 break;
1771
1772 case SRE_OP_BRANCH:
1773 {
1774 SRE_CODE *target = NULL;
1775 for (;;) {
1776 GET_SKIP;
1777 if (skip == 0)
1778 break;
1779 /* Stop 2 before the end; we check the JUMP below */
1780 if (!_validate_inner(code, code+skip-3, groups))
1781 FAIL;
1782 code += skip-3;
1783 /* Check that it ends with a JUMP, and that each JUMP
1784 has the same target */
1785 GET_OP;
1786 if (op != SRE_OP_JUMP)
1787 FAIL;
1788 GET_SKIP;
1789 if (target == NULL)
1790 target = code+skip-1;
1791 else if (code+skip-1 != target)
1792 FAIL;
1793 }
1794 }
1795 break;
1796
1797 case SRE_OP_REPEAT_ONE:
1798 case SRE_OP_MIN_REPEAT_ONE:
1799 {
1800 SRE_CODE min, max;
1801 GET_SKIP;
1802 GET_ARG; min = arg;
1803 GET_ARG; max = arg;
1804 if (min > max)
1805 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001806 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001807 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001808 if (!_validate_inner(code, code+skip-4, groups))
1809 FAIL;
1810 code += skip-4;
1811 GET_OP;
1812 if (op != SRE_OP_SUCCESS)
1813 FAIL;
1814 }
1815 break;
1816
1817 case SRE_OP_REPEAT:
1818 {
1819 SRE_CODE min, max;
1820 GET_SKIP;
1821 GET_ARG; min = arg;
1822 GET_ARG; max = arg;
1823 if (min > max)
1824 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001825 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001826 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001827 if (!_validate_inner(code, code+skip-3, groups))
1828 FAIL;
1829 code += skip-3;
1830 GET_OP;
1831 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1832 FAIL;
1833 }
1834 break;
1835
1836 case SRE_OP_GROUPREF:
1837 case SRE_OP_GROUPREF_IGNORE:
1838 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001839 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001840 FAIL;
1841 break;
1842
1843 case SRE_OP_GROUPREF_EXISTS:
1844 /* The regex syntax for this is: '(?(group)then|else)', where
1845 'group' is either an integer group number or a group name,
1846 'then' and 'else' are sub-regexes, and 'else' is optional. */
1847 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001848 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001849 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001850 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001851 code--; /* The skip is relative to the first arg! */
1852 /* There are two possibilities here: if there is both a 'then'
1853 part and an 'else' part, the generated code looks like:
1854
1855 GROUPREF_EXISTS
1856 <group>
1857 <skipyes>
1858 ...then part...
1859 JUMP
1860 <skipno>
1861 (<skipyes> jumps here)
1862 ...else part...
1863 (<skipno> jumps here)
1864
1865 If there is only a 'then' part, it looks like:
1866
1867 GROUPREF_EXISTS
1868 <group>
1869 <skip>
1870 ...then part...
1871 (<skip> jumps here)
1872
1873 There is no direct way to decide which it is, and we don't want
1874 to allow arbitrary jumps anywhere in the code; so we just look
1875 for a JUMP opcode preceding our skip target.
1876 */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001877 if (skip >= 3 && skip-3 < (Py_uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001878 code[skip-3] == SRE_OP_JUMP)
1879 {
1880 VTRACE(("both then and else parts present\n"));
1881 if (!_validate_inner(code+1, code+skip-3, groups))
1882 FAIL;
1883 code += skip-2; /* Position after JUMP, at <skipno> */
1884 GET_SKIP;
1885 if (!_validate_inner(code, code+skip-1, groups))
1886 FAIL;
1887 code += skip-1;
1888 }
1889 else {
1890 VTRACE(("only a then part present\n"));
1891 if (!_validate_inner(code+1, code+skip-1, groups))
1892 FAIL;
1893 code += skip-1;
1894 }
1895 break;
1896
1897 case SRE_OP_ASSERT:
1898 case SRE_OP_ASSERT_NOT:
1899 GET_SKIP;
1900 GET_ARG; /* 0 for lookahead, width for lookbehind */
1901 code--; /* Back up over arg to simplify math below */
1902 if (arg & 0x80000000)
1903 FAIL; /* Width too large */
1904 /* Stop 1 before the end; we check the SUCCESS below */
1905 if (!_validate_inner(code+1, code+skip-2, groups))
1906 FAIL;
1907 code += skip-2;
1908 GET_OP;
1909 if (op != SRE_OP_SUCCESS)
1910 FAIL;
1911 break;
1912
1913 default:
1914 FAIL;
1915
1916 }
1917 }
1918
1919 VTRACE(("okay\n"));
1920 return 1;
1921}
1922
1923static int
1924_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1925{
1926 if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
1927 FAIL;
1928 if (groups == 0) /* fix for simplejson */
1929 groups = 100; /* 100 groups should always be safe */
1930 return _validate_inner(code, end-1, groups);
1931}
1932
1933static int
1934_validate(PatternObject *self)
1935{
1936 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1937 {
1938 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1939 return 0;
1940 }
1941 else
1942 VTRACE(("Success!\n"));
1943 return 1;
1944}
1945
1946/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001947/* match methods */
1948
1949static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001950match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001951{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001952 Py_XDECREF(self->regs);
1953 Py_XDECREF(self->string);
1954 Py_DECREF(self->pattern);
1955 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001956}
1957
1958static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001959match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001960{
Serhiy Storchaka25324972013-10-16 12:46:28 +03001961 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001962 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001963 Py_buffer view;
1964 PyObject *result;
1965 void* ptr;
1966
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001967 if (index < 0 || index >= self->groups) {
1968 /* raise IndexError if we were given a bad group number */
1969 PyErr_SetString(
1970 PyExc_IndexError,
1971 "no such group"
1972 );
1973 return NULL;
1974 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001975
Fredrik Lundh6f013982000-07-03 18:44:21 +00001976 index *= 2;
1977
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001978 if (self->string == Py_None || self->mark[index] < 0) {
1979 /* return default value if the string or group is undefined */
1980 Py_INCREF(def);
1981 return def;
1982 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001983
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001984 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001985 if (ptr == NULL)
1986 return NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001987 result = getslice(isbytes, ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001988 self->string, self->mark[index], self->mark[index+1]);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001989 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001990 PyBuffer_Release(&view);
1991 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001992}
1993
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001994static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001995match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001996{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001997 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001998
Guido van Rossumddefaf32007-01-14 03:31:43 +00001999 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002000 /* Default value */
2001 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00002002
Christian Heimes217cfd12007-12-02 14:31:20 +00002003 if (PyLong_Check(index))
2004 return PyLong_AsSsize_t(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00002005
Fredrik Lundh6f013982000-07-03 18:44:21 +00002006 i = -1;
2007
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002008 if (self->pattern->groupindex) {
2009 index = PyObject_GetItem(self->pattern->groupindex, index);
2010 if (index) {
Neal Norwitz1fe5f382007-08-31 04:32:55 +00002011 if (PyLong_Check(index))
Christian Heimes217cfd12007-12-02 14:31:20 +00002012 i = PyLong_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00002013 Py_DECREF(index);
2014 } else
2015 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002016 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002017
2018 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002019}
2020
2021static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002022match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002023{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002024 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002025}
2026
2027static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002028match_expand(MatchObject* self, PyObject* ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002029{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002030 /* delegate to Python code */
2031 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00002032 SRE_PY_MODULE, "_expand",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002033 PyTuple_Pack(3, self->pattern, self, ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002034 );
2035}
2036
2037static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002038match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002039{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002040 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002041 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002042
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002043 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002044
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002045 switch (size) {
2046 case 0:
2047 result = match_getslice(self, Py_False, Py_None);
2048 break;
2049 case 1:
2050 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2051 break;
2052 default:
2053 /* fetch multiple items */
2054 result = PyTuple_New(size);
2055 if (!result)
2056 return NULL;
2057 for (i = 0; i < size; i++) {
2058 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002059 self, PyTuple_GET_ITEM(args, i), Py_None
2060 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002061 if (!item) {
2062 Py_DECREF(result);
2063 return NULL;
2064 }
2065 PyTuple_SET_ITEM(result, i, item);
2066 }
2067 break;
2068 }
2069 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002070}
2071
2072static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002073match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002074{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002075 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002076 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002077
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002078 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002079 static char* kwlist[] = { "default", NULL };
Fredrik Lundh562586e2000-10-03 20:43:34 +00002080 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002081 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002082
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002083 result = PyTuple_New(self->groups-1);
2084 if (!result)
2085 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002086
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002087 for (index = 1; index < self->groups; index++) {
2088 PyObject* item;
2089 item = match_getslice_by_index(self, index, def);
2090 if (!item) {
2091 Py_DECREF(result);
2092 return NULL;
2093 }
2094 PyTuple_SET_ITEM(result, index-1, item);
2095 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002096
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002097 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002098}
2099
2100static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002101match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002102{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002103 PyObject* result;
2104 PyObject* keys;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002105 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002106
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002107 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002108 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00002109 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002110 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002111
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002112 result = PyDict_New();
2113 if (!result || !self->pattern->groupindex)
2114 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002115
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002116 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002117 if (!keys)
2118 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002119
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002120 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002121 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002122 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002123 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002124 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002125 if (!key)
2126 goto failed;
2127 value = match_getslice(self, key, def);
2128 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002129 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002130 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002131 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002132 status = PyDict_SetItem(result, key, value);
2133 Py_DECREF(value);
2134 if (status < 0)
2135 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002136 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002137
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002138 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002139
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002140 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002141
2142failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00002143 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002144 Py_DECREF(result);
2145 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002146}
2147
2148static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002149match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002150{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002151 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002152
Guido van Rossumddefaf32007-01-14 03:31:43 +00002153 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002154 if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002155 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002156
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002157 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002158
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002159 if (index < 0 || index >= self->groups) {
2160 PyErr_SetString(
2161 PyExc_IndexError,
2162 "no such group"
2163 );
2164 return NULL;
2165 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002166
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002167 /* mark is -1 if group is undefined */
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002168 return PyLong_FromSsize_t(self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002169}
2170
2171static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002172match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002173{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002174 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002175
Guido van Rossumddefaf32007-01-14 03:31:43 +00002176 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002177 if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002178 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002179
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002180 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002181
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002182 if (index < 0 || index >= self->groups) {
2183 PyErr_SetString(
2184 PyExc_IndexError,
2185 "no such group"
2186 );
2187 return NULL;
2188 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002189
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002190 /* mark is -1 if group is undefined */
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002191 return PyLong_FromSsize_t(self->mark[index*2+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002192}
2193
2194LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002195_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002196{
2197 PyObject* pair;
2198 PyObject* item;
2199
2200 pair = PyTuple_New(2);
2201 if (!pair)
2202 return NULL;
2203
Christian Heimes217cfd12007-12-02 14:31:20 +00002204 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002205 if (!item)
2206 goto error;
2207 PyTuple_SET_ITEM(pair, 0, item);
2208
Christian Heimes217cfd12007-12-02 14:31:20 +00002209 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002210 if (!item)
2211 goto error;
2212 PyTuple_SET_ITEM(pair, 1, item);
2213
2214 return pair;
2215
2216 error:
2217 Py_DECREF(pair);
2218 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002219}
2220
2221static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002222match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002223{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002224 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002225
Guido van Rossumddefaf32007-01-14 03:31:43 +00002226 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002227 if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002228 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002229
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002230 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002231
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002232 if (index < 0 || index >= self->groups) {
2233 PyErr_SetString(
2234 PyExc_IndexError,
2235 "no such group"
2236 );
2237 return NULL;
2238 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002239
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002240 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002241 return _pair(self->mark[index*2], self->mark[index*2+1]);
2242}
2243
2244static PyObject*
2245match_regs(MatchObject* self)
2246{
2247 PyObject* regs;
2248 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002249 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002250
2251 regs = PyTuple_New(self->groups);
2252 if (!regs)
2253 return NULL;
2254
2255 for (index = 0; index < self->groups; index++) {
2256 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2257 if (!item) {
2258 Py_DECREF(regs);
2259 return NULL;
2260 }
2261 PyTuple_SET_ITEM(regs, index, item);
2262 }
2263
2264 Py_INCREF(regs);
2265 self->regs = regs;
2266
2267 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002268}
2269
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002270static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002271match_copy(MatchObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002272{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002273#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002274 MatchObject* copy;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002275 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00002276
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002277 slots = 2 * (self->pattern->groups+1);
2278
2279 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2280 if (!copy)
2281 return NULL;
2282
2283 /* this value a constant, but any compiler should be able to
2284 figure that out all by itself */
2285 offset = offsetof(MatchObject, string);
2286
2287 Py_XINCREF(self->pattern);
2288 Py_XINCREF(self->string);
2289 Py_XINCREF(self->regs);
2290
2291 memcpy((char*) copy + offset, (char*) self + offset,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002292 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002293
2294 return (PyObject*) copy;
2295#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002296 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002297 return NULL;
2298#endif
2299}
2300
2301static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002302match_deepcopy(MatchObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002303{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002304#ifdef USE_BUILTIN_COPY
2305 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002306
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002307 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002308 if (!copy)
2309 return NULL;
2310
2311 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2312 !deepcopy(&copy->string, memo) ||
2313 !deepcopy(&copy->regs, memo)) {
2314 Py_DECREF(copy);
2315 return NULL;
2316 }
2317
2318#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002319 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2320 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002321#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002322}
2323
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002324PyDoc_STRVAR(match_doc,
2325"The result of re.match() and re.search().\n\
2326Match objects always have a boolean value of True.");
2327
2328PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002329"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002330 Return subgroup(s) of the match by indices or names.\n\
2331 For 0 returns the entire match.");
2332
2333PyDoc_STRVAR(match_start_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002334"start([group=0]) -> int.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002335 Return index of the start of the substring matched by group.");
2336
2337PyDoc_STRVAR(match_end_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002338"end([group=0]) -> int.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002339 Return index of the end of the substring matched by group.");
2340
2341PyDoc_STRVAR(match_span_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002342"span([group]) -> tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002343 For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).");
2344
2345PyDoc_STRVAR(match_groups_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002346"groups([default=None]) -> tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002347 Return a tuple containing all the subgroups of the match, from 1.\n\
2348 The default argument is used for groups\n\
2349 that did not participate in the match");
2350
2351PyDoc_STRVAR(match_groupdict_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002352"groupdict([default=None]) -> dict.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002353 Return a dictionary containing all the named subgroups of the match,\n\
2354 keyed by the subgroup name. The default argument is used for groups\n\
2355 that did not participate in the match");
2356
2357PyDoc_STRVAR(match_expand_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002358"expand(template) -> str.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002359 Return the string obtained by doing backslash substitution\n\
2360 on the string template, as done by the sub() method.");
2361
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002362static PyMethodDef match_methods[] = {
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002363 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2364 {"start", (PyCFunction) match_start, METH_VARARGS, match_start_doc},
2365 {"end", (PyCFunction) match_end, METH_VARARGS, match_end_doc},
2366 {"span", (PyCFunction) match_span, METH_VARARGS, match_span_doc},
2367 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS,
2368 match_groups_doc},
2369 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS,
2370 match_groupdict_doc},
2371 {"expand", (PyCFunction) match_expand, METH_O, match_expand_doc},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002372 {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
2373 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002374 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002375};
2376
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002377static PyObject *
2378match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002379{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002380 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002381 return PyLong_FromSsize_t(self->lastindex);
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002382 Py_INCREF(Py_None);
2383 return Py_None;
Guido van Rossumb700df92000-03-31 14:59:30 +00002384}
2385
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002386static PyObject *
2387match_lastgroup_get(MatchObject *self)
2388{
2389 if (self->pattern->indexgroup && self->lastindex >= 0) {
2390 PyObject* result = PySequence_GetItem(
2391 self->pattern->indexgroup, self->lastindex
2392 );
2393 if (result)
2394 return result;
2395 PyErr_Clear();
2396 }
2397 Py_INCREF(Py_None);
2398 return Py_None;
2399}
2400
2401static PyObject *
2402match_regs_get(MatchObject *self)
2403{
2404 if (self->regs) {
2405 Py_INCREF(self->regs);
2406 return self->regs;
2407 } else
2408 return match_regs(self);
2409}
2410
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002411static PyObject *
2412match_repr(MatchObject *self)
2413{
2414 PyObject *result;
2415 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2416 if (group0 == NULL)
2417 return NULL;
2418 result = PyUnicode_FromFormat(
2419 "<%s object; span=(%d, %d), match=%.50R>",
2420 Py_TYPE(self)->tp_name,
2421 self->mark[0], self->mark[1], group0);
2422 Py_DECREF(group0);
2423 return result;
2424}
2425
2426
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002427static PyGetSetDef match_getset[] = {
2428 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
2429 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
2430 {"regs", (getter)match_regs_get, (setter)NULL},
2431 {NULL}
2432};
2433
2434#define MATCH_OFF(x) offsetof(MatchObject, x)
2435static PyMemberDef match_members[] = {
2436 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
2437 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
2438 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
2439 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
2440 {NULL}
2441};
2442
Guido van Rossumb700df92000-03-31 14:59:30 +00002443/* FIXME: implement setattr("string", None) as a special case (to
2444 detach the associated string, if any */
2445
Neal Norwitz57c179c2006-03-22 07:18:02 +00002446static PyTypeObject Match_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002447 PyVarObject_HEAD_INIT(NULL,0)
2448 "_" SRE_MODULE ".SRE_Match",
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002449 sizeof(MatchObject), sizeof(Py_ssize_t),
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002450 (destructor)match_dealloc, /* tp_dealloc */
2451 0, /* tp_print */
2452 0, /* tp_getattr */
2453 0, /* tp_setattr */
2454 0, /* tp_reserved */
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002455 (reprfunc)match_repr, /* tp_repr */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002456 0, /* tp_as_number */
2457 0, /* tp_as_sequence */
2458 0, /* tp_as_mapping */
2459 0, /* tp_hash */
2460 0, /* tp_call */
2461 0, /* tp_str */
2462 0, /* tp_getattro */
2463 0, /* tp_setattro */
2464 0, /* tp_as_buffer */
2465 Py_TPFLAGS_DEFAULT, /* tp_flags */
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002466 match_doc, /* tp_doc */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002467 0, /* tp_traverse */
2468 0, /* tp_clear */
2469 0, /* tp_richcompare */
2470 0, /* tp_weaklistoffset */
2471 0, /* tp_iter */
2472 0, /* tp_iternext */
2473 match_methods, /* tp_methods */
2474 match_members, /* tp_members */
2475 match_getset, /* tp_getset */
Guido van Rossumb700df92000-03-31 14:59:30 +00002476};
2477
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002478static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002479pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002480{
2481 /* create match object (from state object) */
2482
2483 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002484 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002485 char* base;
2486 int n;
2487
2488 if (status > 0) {
2489
2490 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002491 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002492 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2493 2*(pattern->groups+1));
2494 if (!match)
2495 return NULL;
2496
2497 Py_INCREF(pattern);
2498 match->pattern = pattern;
2499
2500 Py_INCREF(state->string);
2501 match->string = state->string;
2502
2503 match->regs = NULL;
2504 match->groups = pattern->groups+1;
2505
2506 /* fill in group slices */
2507
2508 base = (char*) state->beginning;
2509 n = state->charsize;
2510
2511 match->mark[0] = ((char*) state->start - base) / n;
2512 match->mark[1] = ((char*) state->ptr - base) / n;
2513
2514 for (i = j = 0; i < pattern->groups; i++, j+=2)
2515 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2516 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2517 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2518 } else
2519 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2520
2521 match->pos = state->pos;
2522 match->endpos = state->endpos;
2523
2524 match->lastindex = state->lastindex;
2525
2526 return (PyObject*) match;
2527
2528 } else if (status == 0) {
2529
2530 /* no match */
2531 Py_INCREF(Py_None);
2532 return Py_None;
2533
2534 }
2535
2536 /* internal error */
2537 pattern_error(status);
2538 return NULL;
2539}
2540
2541
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002542/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002543/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002544
2545static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002546scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002547{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002548 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002549 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002550 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002551}
2552
2553static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002554scanner_match(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002555{
2556 SRE_STATE* state = &self->state;
2557 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002558 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002559
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002560 state_reset(state);
2561
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002562 state->ptr = state->start;
2563
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002564 status = sre_match(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002565 if (PyErr_Occurred())
2566 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002567
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002568 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002569 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002570
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00002571 if (status == 0 || state->ptr == state->start)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002572 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002573 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002574 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002575
2576 return match;
2577}
2578
2579
2580static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002581scanner_search(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002582{
2583 SRE_STATE* state = &self->state;
2584 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002585 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002586
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002587 state_reset(state);
2588
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002589 state->ptr = state->start;
2590
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002591 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002592 if (PyErr_Occurred())
2593 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002594
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002595 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002596 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002597
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00002598 if (status == 0 || state->ptr == state->start)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002599 state->start = (void*) ((char*) state->ptr + state->charsize);
2600 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002601 state->start = state->ptr;
2602
2603 return match;
2604}
2605
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002606static PyMethodDef scanner_methods[] = {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002607 {"match", (PyCFunction) scanner_match, METH_NOARGS},
2608 {"search", (PyCFunction) scanner_search, METH_NOARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002609 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002610};
2611
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002612#define SCAN_OFF(x) offsetof(ScannerObject, x)
2613static PyMemberDef scanner_members[] = {
Ezio Melotti7c8c1ea2011-09-29 01:00:19 +03002614 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002615 {NULL} /* Sentinel */
2616};
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002617
Neal Norwitz57c179c2006-03-22 07:18:02 +00002618static PyTypeObject Scanner_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002619 PyVarObject_HEAD_INIT(NULL, 0)
2620 "_" SRE_MODULE ".SRE_Scanner",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002621 sizeof(ScannerObject), 0,
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002622 (destructor)scanner_dealloc,/* tp_dealloc */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002623 0, /* tp_print */
2624 0, /* tp_getattr */
2625 0, /* tp_setattr */
2626 0, /* tp_reserved */
2627 0, /* tp_repr */
2628 0, /* tp_as_number */
2629 0, /* tp_as_sequence */
2630 0, /* tp_as_mapping */
2631 0, /* tp_hash */
2632 0, /* tp_call */
2633 0, /* tp_str */
2634 0, /* tp_getattro */
2635 0, /* tp_setattro */
2636 0, /* tp_as_buffer */
2637 Py_TPFLAGS_DEFAULT, /* tp_flags */
2638 0, /* tp_doc */
2639 0, /* tp_traverse */
2640 0, /* tp_clear */
2641 0, /* tp_richcompare */
2642 0, /* tp_weaklistoffset */
2643 0, /* tp_iter */
2644 0, /* tp_iternext */
2645 scanner_methods, /* tp_methods */
2646 scanner_members, /* tp_members */
2647 0, /* tp_getset */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002648};
2649
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002650static PyObject*
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06002651pattern_scanner(PatternObject* pattern, PyObject* args, PyObject* kw)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002652{
2653 /* create search state object */
2654
2655 ScannerObject* self;
2656
2657 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002658 Py_ssize_t start = 0;
2659 Py_ssize_t end = PY_SSIZE_T_MAX;
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06002660 static char* kwlist[] = { "source", "pos", "endpos", NULL };
2661 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:scanner", kwlist,
2662 &string, &start, &end))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002663 return NULL;
2664
2665 /* create scanner object */
2666 self = PyObject_NEW(ScannerObject, &Scanner_Type);
2667 if (!self)
2668 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002669 self->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002670
2671 string = state_init(&self->state, pattern, string, start, end);
2672 if (!string) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002673 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002674 return NULL;
2675 }
2676
2677 Py_INCREF(pattern);
2678 self->pattern = (PyObject*) pattern;
2679
2680 return (PyObject*) self;
2681}
2682
Guido van Rossumb700df92000-03-31 14:59:30 +00002683static PyMethodDef _functions[] = {
Neal Norwitzb0493252002-03-31 14:44:22 +00002684 {"compile", _compile, METH_VARARGS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002685 {"getcodesize", sre_codesize, METH_NOARGS},
Neal Norwitzb0493252002-03-31 14:44:22 +00002686 {"getlower", sre_getlower, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002687 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002688};
2689
Martin v. Löwis1a214512008-06-11 05:26:20 +00002690static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002691 PyModuleDef_HEAD_INIT,
2692 "_" SRE_MODULE,
2693 NULL,
2694 -1,
2695 _functions,
2696 NULL,
2697 NULL,
2698 NULL,
2699 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002700};
2701
2702PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002703{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002704 PyObject* m;
2705 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002706 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002707
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002708 /* Patch object types */
2709 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2710 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002711 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002712
Martin v. Löwis1a214512008-06-11 05:26:20 +00002713 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002714 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002715 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002716 d = PyModule_GetDict(m);
2717
Christian Heimes217cfd12007-12-02 14:31:20 +00002718 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002719 if (x) {
2720 PyDict_SetItemString(d, "MAGIC", x);
2721 Py_DECREF(x);
2722 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002723
Christian Heimes217cfd12007-12-02 14:31:20 +00002724 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002725 if (x) {
2726 PyDict_SetItemString(d, "CODESIZE", x);
2727 Py_DECREF(x);
2728 }
2729
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002730 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2731 if (x) {
2732 PyDict_SetItemString(d, "MAXREPEAT", x);
2733 Py_DECREF(x);
2734 }
2735
Neal Norwitzfe537132007-08-26 03:55:15 +00002736 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002737 if (x) {
2738 PyDict_SetItemString(d, "copyright", x);
2739 Py_DECREF(x);
2740 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002741 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002742}
2743
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002744/* vim:ts=4:sw=4:et
2745*/