blob: 300d883cf6155667d8c13826282825469da487ae [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000038static char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000063/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064
65/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000066#define USE_FAST_SEARCH
67
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000068/* enables copy/deepcopy handling (work in progress) */
69#undef USE_BUILTIN_COPY
70
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000071/* -------------------------------------------------------------------- */
72
Fredrik Lundh80946112000-06-29 18:03:25 +000073#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000074#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000075#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000076/* fastest possible local call under MSVC */
77#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000078#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000079#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080#else
81#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000082#endif
83
84/* error codes */
85#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000086#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000087#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000088#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000089#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000090
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000091#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000092#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000093#else
94#define TRACE(v)
95#endif
96
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000097/* -------------------------------------------------------------------- */
98/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000099
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000100/* default character predicates (run sre_chars.py to regenerate tables) */
101
102#define SRE_DIGIT_MASK 1
103#define SRE_SPACE_MASK 2
104#define SRE_LINEBREAK_MASK 4
105#define SRE_ALNUM_MASK 8
106#define SRE_WORD_MASK 16
107
Fredrik Lundh21009b92001-09-18 18:47:09 +0000108/* FIXME: this assumes ASCII. create tables in init_sre() instead */
109
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000110static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1112, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1120, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
11325, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
11424, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1150, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
11624, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
117
Fredrik Lundhb389df32000-06-29 12:48:37 +0000118static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000011910, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
12027, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
12144, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
12261, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
123108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
124122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
125106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
126120, 121, 122, 123, 124, 125, 126, 127 };
127
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000128#define SRE_IS_DIGIT(ch)\
129 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
130#define SRE_IS_SPACE(ch)\
131 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
132#define SRE_IS_LINEBREAK(ch)\
133 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
134#define SRE_IS_ALNUM(ch)\
135 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
136#define SRE_IS_WORD(ch)\
137 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000138
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000139static unsigned int sre_lower(unsigned int ch)
140{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000141 return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000142}
143
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000144/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000145/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
146 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000147#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000148#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
149
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000150static unsigned int sre_lower_locale(unsigned int ch)
151{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000152 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000153}
154
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000155/* unicode-specific character predicates */
156
Victor Stinner0058b862011-09-29 03:27:47 +0200157#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
158#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
159#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
160#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
161#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000162
163static unsigned int sre_lower_unicode(unsigned int ch)
164{
Victor Stinner0058b862011-09-29 03:27:47 +0200165 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000166}
167
Guido van Rossumb700df92000-03-31 14:59:30 +0000168LOCAL(int)
169sre_category(SRE_CODE category, unsigned int ch)
170{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000171 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000172
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000173 case SRE_CATEGORY_DIGIT:
174 return SRE_IS_DIGIT(ch);
175 case SRE_CATEGORY_NOT_DIGIT:
176 return !SRE_IS_DIGIT(ch);
177 case SRE_CATEGORY_SPACE:
178 return SRE_IS_SPACE(ch);
179 case SRE_CATEGORY_NOT_SPACE:
180 return !SRE_IS_SPACE(ch);
181 case SRE_CATEGORY_WORD:
182 return SRE_IS_WORD(ch);
183 case SRE_CATEGORY_NOT_WORD:
184 return !SRE_IS_WORD(ch);
185 case SRE_CATEGORY_LINEBREAK:
186 return SRE_IS_LINEBREAK(ch);
187 case SRE_CATEGORY_NOT_LINEBREAK:
188 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000189
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000190 case SRE_CATEGORY_LOC_WORD:
191 return SRE_LOC_IS_WORD(ch);
192 case SRE_CATEGORY_LOC_NOT_WORD:
193 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000194
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000195 case SRE_CATEGORY_UNI_DIGIT:
196 return SRE_UNI_IS_DIGIT(ch);
197 case SRE_CATEGORY_UNI_NOT_DIGIT:
198 return !SRE_UNI_IS_DIGIT(ch);
199 case SRE_CATEGORY_UNI_SPACE:
200 return SRE_UNI_IS_SPACE(ch);
201 case SRE_CATEGORY_UNI_NOT_SPACE:
202 return !SRE_UNI_IS_SPACE(ch);
203 case SRE_CATEGORY_UNI_WORD:
204 return SRE_UNI_IS_WORD(ch);
205 case SRE_CATEGORY_UNI_NOT_WORD:
206 return !SRE_UNI_IS_WORD(ch);
207 case SRE_CATEGORY_UNI_LINEBREAK:
208 return SRE_UNI_IS_LINEBREAK(ch);
209 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
210 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000211 }
212 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000213}
214
215/* helpers */
216
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000217static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000218data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000219{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000220 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000221 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000222 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000223 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000224 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000225}
226
227static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000228data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000229{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000230 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000231 minsize = state->data_stack_base+size;
232 cursize = state->data_stack_size;
233 if (cursize < minsize) {
234 void* stack;
235 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300236 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000237 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000238 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000239 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000240 return SRE_ERROR_MEMORY;
241 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000242 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000243 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000244 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000245 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000246}
247
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000248/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000249
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300250#define SRE_CHAR Py_UCS1
251#define SIZEOF_SRE_CHAR 1
252#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300253#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000254
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300255/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000256
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300257#define SRE_CHAR Py_UCS2
258#define SIZEOF_SRE_CHAR 2
259#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300260#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000261
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300262/* generate 32-bit unicode version */
263
264#define SRE_CHAR Py_UCS4
265#define SIZEOF_SRE_CHAR 4
266#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300267#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000268
269/* -------------------------------------------------------------------- */
270/* factories and destructors */
271
272/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100273static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600274static PyObject*pattern_scanner(PatternObject*, PyObject*, PyObject* kw);
Guido van Rossumb700df92000-03-31 14:59:30 +0000275
276static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000277sre_codesize(PyObject* self, PyObject *unused)
Guido van Rossumb700df92000-03-31 14:59:30 +0000278{
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100279 return PyLong_FromSize_t(sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +0000280}
281
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000282static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +0000283sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000284{
285 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000286 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000287 return NULL;
288 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000289 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000290 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000291 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundhb389df32000-06-29 12:48:37 +0000292 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000293}
294
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000295LOCAL(void)
296state_reset(SRE_STATE* state)
297{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000298 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000299 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000300
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000301 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000302 state->lastindex = -1;
303
304 state->repeat = NULL;
305
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000306 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000307}
308
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000309static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200310getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300311 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600312 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000313{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000314 /* given a python object, return a data pointer, a length (in
315 characters), and a character size. return NULL if the object
316 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000317
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000318 /* Unicode objects do not support the buffer API. So, get the data
319 directly instead. */
320 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200321 if (PyUnicode_READY(string) == -1)
322 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200323 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200324 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300325 *p_isbytes = 0;
326 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000327 }
328
Victor Stinner0058b862011-09-29 03:27:47 +0200329 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300330 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
331 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
332 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000333 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000334
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300335 *p_length = view->len;
336 *p_charsize = 1;
337 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000338
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300339 if (view->buf == NULL) {
340 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
341 PyBuffer_Release(view);
342 view->buf = NULL;
343 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000344 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300345 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000346}
347
348LOCAL(PyObject*)
349state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000350 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000351{
352 /* prepare state object */
353
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000354 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300355 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000356 void* ptr;
357
358 memset(state, 0, sizeof(SRE_STATE));
359
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000360 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000361 state->lastindex = -1;
362
Benjamin Petersone48944b2012-03-07 14:50:25 -0600363 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300364 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000365 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600366 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000367
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300368 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600369 PyErr_SetString(PyExc_TypeError,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300370 "can't use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600371 goto err;
372 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300373 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600374 PyErr_SetString(PyExc_TypeError,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300375 "can't use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600376 goto err;
377 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000378
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000379 /* adjust boundaries */
380 if (start < 0)
381 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000382 else if (start > length)
383 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000384
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000385 if (end < 0)
386 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000387 else if (end > length)
388 end = length;
389
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300390 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000391 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000392
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000393 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000394
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000395 state->start = (void*) ((char*) ptr + start * state->charsize);
396 state->end = (void*) ((char*) ptr + end * state->charsize);
397
398 Py_INCREF(string);
399 state->string = string;
400 state->pos = start;
401 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000402
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000403 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000404 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000405 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000406 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000407 else
Fredrik Lundhb389df32000-06-29 12:48:37 +0000408 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000409
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000410 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600411 err:
412 if (state->buffer.buf)
413 PyBuffer_Release(&state->buffer);
414 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000415}
416
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000417LOCAL(void)
418state_fini(SRE_STATE* state)
419{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600420 if (state->buffer.buf)
421 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000422 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000423 data_stack_dealloc(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000424}
425
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000426/* calculate offset from start of string */
427#define STATE_OFFSET(state, member)\
428 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
429
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000430LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300431getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300432 PyObject* string, Py_ssize_t start, Py_ssize_t end)
433{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300434 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300435 if (PyBytes_CheckExact(string) &&
436 start == 0 && end == PyBytes_GET_SIZE(string)) {
437 Py_INCREF(string);
438 return string;
439 }
440 return PyBytes_FromStringAndSize(
441 (const char *)ptr + start, end - start);
442 }
443 else {
444 return PyUnicode_Substring(string, start, end);
445 }
446}
447
448LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000449state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000450{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000451 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000452
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000453 index = (index - 1) * 2;
454
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000455 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000456 if (empty)
457 /* want empty string */
458 i = j = 0;
459 else {
460 Py_INCREF(Py_None);
461 return Py_None;
462 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000463 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000464 i = STATE_OFFSET(state, state->mark[index]);
465 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000466 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000467
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300468 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000469}
470
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000471static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100472pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000473{
474 switch (status) {
475 case SRE_ERROR_RECURSION_LIMIT:
476 PyErr_SetString(
477 PyExc_RuntimeError,
478 "maximum recursion limit exceeded"
479 );
480 break;
481 case SRE_ERROR_MEMORY:
482 PyErr_NoMemory();
483 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000484 case SRE_ERROR_INTERRUPTED:
485 /* An exception has already been raised, so let it fly */
486 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000487 default:
488 /* other error codes indicate compiler/engine bugs */
489 PyErr_SetString(
490 PyExc_RuntimeError,
491 "internal error in regular expression engine"
492 );
493 }
494}
495
Guido van Rossumb700df92000-03-31 14:59:30 +0000496static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000497pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000498{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000499 if (self->weakreflist != NULL)
500 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000501 Py_XDECREF(self->pattern);
502 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000503 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000504 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000505}
506
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300507LOCAL(Py_ssize_t)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300508sre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300509{
510 if (state->charsize == 1)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300511 return sre_ucs1_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300512 if (state->charsize == 2)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300513 return sre_ucs2_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300514 assert(state->charsize == 4);
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300515 return sre_ucs4_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300516}
517
518LOCAL(Py_ssize_t)
519sre_search(SRE_STATE* state, SRE_CODE* pattern)
520{
521 if (state->charsize == 1)
522 return sre_ucs1_search(state, pattern);
523 if (state->charsize == 2)
524 return sre_ucs2_search(state, pattern);
525 assert(state->charsize == 4);
526 return sre_ucs4_search(state, pattern);
527}
528
Larry Hastings16c51912014-01-07 11:53:01 -0800529static PyObject *
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200530fix_string_param(PyObject *string, PyObject *string2, const char *oldname)
531{
532 if (string2 != NULL) {
533 if (string != NULL) {
534 PyErr_Format(PyExc_TypeError,
535 "Argument given by name ('%s') and position (1)",
536 oldname);
537 return NULL;
538 }
539 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
540 "The '%s' keyword parameter name is deprecated. "
541 "Use 'string' instead.", oldname) < 0)
542 return NULL;
543 return string2;
544 }
545 if (string == NULL) {
546 PyErr_SetString(PyExc_TypeError,
547 "Required argument 'string' (pos 1) not found");
548 return NULL;
549 }
550 return string;
551}
Larry Hastings16c51912014-01-07 11:53:01 -0800552
553static PyObject *
Larry Hastings5c661892014-01-24 06:17:25 -0800554pattern_match(PatternObject *self, PyObject *args, PyObject *kwargs)
Larry Hastings16c51912014-01-07 11:53:01 -0800555{
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200556 static char *_keywords[] = {"string", "pos", "endpos", "pattern", NULL};
557 PyObject *string = NULL;
Larry Hastings16c51912014-01-07 11:53:01 -0800558 Py_ssize_t pos = 0;
559 Py_ssize_t endpos = PY_SSIZE_T_MAX;
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200560 PyObject *pattern = NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000561 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100562 Py_ssize_t status;
Guido van Rossumb700df92000-03-31 14:59:30 +0000563
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200564 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
565 "|Onn$O:match", _keywords,
566 &string, &pos, &endpos, &pattern))
567 return NULL;
568 string = fix_string_param(string, pattern, "pattern");
569 if (!string)
570 return NULL;
571 string = state_init(&state, (PatternObject *)self, string, pos, endpos);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000572 if (!string)
573 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000574
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000575 state.ptr = state.start;
576
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000577 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
578
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300579 status = sre_match(&state, PatternObject_GetCode(self), 0);
Guido van Rossumb700df92000-03-31 14:59:30 +0000580
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000581 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Thomas Wouters89f507f2006-12-13 04:49:30 +0000582 if (PyErr_Occurred())
583 return NULL;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000584
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000585 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +0000586
Larry Hastings16c51912014-01-07 11:53:01 -0800587 return (PyObject *)pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +0000588}
589
590static PyObject*
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200591pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kw)
592{
593 SRE_STATE state;
594 Py_ssize_t status;
595
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200596 PyObject *string = NULL, *string2 = NULL;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200597 Py_ssize_t start = 0;
598 Py_ssize_t end = PY_SSIZE_T_MAX;
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200599 static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200600 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:fullmatch", kwlist,
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200601 &string, &start, &end, &string2))
602 return NULL;
603
604 string = fix_string_param(string, string2, "pattern");
605 if (!string)
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200606 return NULL;
607
608 string = state_init(&state, self, string, start, end);
609 if (!string)
610 return NULL;
611
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200612 state.ptr = state.start;
613
614 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
615
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300616 status = sre_match(&state, PatternObject_GetCode(self), 1);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200617
618 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
619 if (PyErr_Occurred())
620 return NULL;
621
622 state_fini(&state);
623
624 return pattern_new_match(self, &state, status);
625}
626
627static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +0000628pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +0000629{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000630 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100631 Py_ssize_t status;
Guido van Rossumb700df92000-03-31 14:59:30 +0000632
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200633 PyObject *string = NULL, *string2 = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000634 Py_ssize_t start = 0;
635 Py_ssize_t end = PY_SSIZE_T_MAX;
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200636 static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
637 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:search", kwlist,
638 &string, &start, &end, &string2))
639 return NULL;
640
641 string = fix_string_param(string, string2, "pattern");
642 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000643 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000644
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000645 string = state_init(&state, self, string, start, end);
646 if (!string)
647 return NULL;
648
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000649 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
650
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300651 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000652
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000653 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
654
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000655 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +0000656
Thomas Wouters89f507f2006-12-13 04:49:30 +0000657 if (PyErr_Occurred())
658 return NULL;
659
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000660 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +0000661}
662
663static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000664call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000665{
666 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000667 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000668 PyObject* func;
669 PyObject* result;
670
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000671 if (!args)
672 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000673 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000674 if (!name)
675 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000676 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000677 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000678 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000679 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000680 func = PyObject_GetAttrString(mod, function);
681 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000682 if (!func)
683 return NULL;
684 result = PyObject_CallObject(func, args);
685 Py_DECREF(func);
686 Py_DECREF(args);
687 return result;
688}
689
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000690#ifdef USE_BUILTIN_COPY
691static int
692deepcopy(PyObject** object, PyObject* memo)
693{
694 PyObject* copy;
695
696 copy = call(
697 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000698 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000699 );
700 if (!copy)
701 return 0;
702
703 Py_DECREF(*object);
704 *object = copy;
705
706 return 1; /* success */
707}
708#endif
709
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000710static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +0000711pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +0000712{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000713 SRE_STATE state;
714 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100715 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000716 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000717
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200718 PyObject *string = NULL, *string2 = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000719 Py_ssize_t start = 0;
720 Py_ssize_t end = PY_SSIZE_T_MAX;
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200721 static char* kwlist[] = { "string", "pos", "endpos", "source", NULL };
722 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:findall", kwlist,
723 &string, &start, &end, &string2))
724 return NULL;
725
726 string = fix_string_param(string, string2, "source");
727 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000728 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000729
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000730 string = state_init(&state, self, string, start, end);
731 if (!string)
732 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000733
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000734 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000735 if (!list) {
736 state_fini(&state);
737 return NULL;
738 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000739
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000740 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000741
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000742 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000743
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000744 state_reset(&state);
745
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000746 state.ptr = state.start;
747
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300748 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300749 if (PyErr_Occurred())
750 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000751
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000752 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000753 if (status == 0)
754 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000755 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000756 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000757 }
Tim Peters3d563502006-01-21 02:47:53 +0000758
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000759 /* don't bother to build a match object */
760 switch (self->groups) {
761 case 0:
762 b = STATE_OFFSET(&state, state.start);
763 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300764 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300765 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000766 if (!item)
767 goto error;
768 break;
769 case 1:
770 item = state_getslice(&state, 1, string, 1);
771 if (!item)
772 goto error;
773 break;
774 default:
775 item = PyTuple_New(self->groups);
776 if (!item)
777 goto error;
778 for (i = 0; i < self->groups; i++) {
779 PyObject* o = state_getslice(&state, i+1, string, 1);
780 if (!o) {
781 Py_DECREF(item);
782 goto error;
783 }
784 PyTuple_SET_ITEM(item, i, o);
785 }
786 break;
787 }
788
789 status = PyList_Append(list, item);
790 Py_DECREF(item);
791 if (status < 0)
792 goto error;
793
794 if (state.ptr == state.start)
795 state.start = (void*) ((char*) state.ptr + state.charsize);
796 else
797 state.start = state.ptr;
798
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000799 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000800
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000801 state_fini(&state);
802 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000803
804error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000805 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000806 state_fini(&state);
807 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000808
Guido van Rossumb700df92000-03-31 14:59:30 +0000809}
810
Fredrik Lundh703ce812001-10-24 22:16:30 +0000811static PyObject*
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600812pattern_finditer(PatternObject* pattern, PyObject* args, PyObject* kw)
Fredrik Lundh703ce812001-10-24 22:16:30 +0000813{
814 PyObject* scanner;
815 PyObject* search;
816 PyObject* iterator;
817
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600818 scanner = pattern_scanner(pattern, args, kw);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000819 if (!scanner)
820 return NULL;
821
822 search = PyObject_GetAttrString(scanner, "search");
823 Py_DECREF(scanner);
824 if (!search)
825 return NULL;
826
827 iterator = PyCallIter_New(search, Py_None);
828 Py_DECREF(search);
829
830 return iterator;
831}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000832
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000833static PyObject*
834pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
835{
836 SRE_STATE state;
837 PyObject* list;
838 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100839 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000840 Py_ssize_t n;
841 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000842 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000843
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200844 PyObject *string = NULL, *string2 = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000845 Py_ssize_t maxsplit = 0;
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200846 static char* kwlist[] = { "string", "maxsplit", "source", NULL };
847 if (!PyArg_ParseTupleAndKeywords(args, kw, "|On$O:split", kwlist,
848 &string, &maxsplit, &string2))
849 return NULL;
850
851 string = fix_string_param(string, string2, "source");
852 if (!string)
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000853 return NULL;
854
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000855 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000856 if (!string)
857 return NULL;
858
859 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000860 if (!list) {
861 state_fini(&state);
862 return NULL;
863 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000864
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000865 n = 0;
866 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000867
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000868 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000869
870 state_reset(&state);
871
872 state.ptr = state.start;
873
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300874 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300875 if (PyErr_Occurred())
876 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000877
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000878 if (status <= 0) {
879 if (status == 0)
880 break;
881 pattern_error(status);
882 goto error;
883 }
Tim Peters3d563502006-01-21 02:47:53 +0000884
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000885 if (state.start == state.ptr) {
886 if (last == state.end)
887 break;
888 /* skip one character */
889 state.start = (void*) ((char*) state.ptr + state.charsize);
890 continue;
891 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000892
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000893 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300894 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000895 string, STATE_OFFSET(&state, last),
896 STATE_OFFSET(&state, state.start)
897 );
898 if (!item)
899 goto error;
900 status = PyList_Append(list, item);
901 Py_DECREF(item);
902 if (status < 0)
903 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000904
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000905 /* add groups (if any) */
906 for (i = 0; i < self->groups; i++) {
907 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000908 if (!item)
909 goto error;
910 status = PyList_Append(list, item);
911 Py_DECREF(item);
912 if (status < 0)
913 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000914 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000915
916 n = n + 1;
917
918 last = state.start = state.ptr;
919
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000920 }
921
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000922 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300923 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000924 string, STATE_OFFSET(&state, last), state.endpos
925 );
926 if (!item)
927 goto error;
928 status = PyList_Append(list, item);
929 Py_DECREF(item);
930 if (status < 0)
931 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000932
933 state_fini(&state);
934 return list;
935
936error:
937 Py_DECREF(list);
938 state_fini(&state);
939 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000940
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000941}
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000942
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000943static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000944pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000945 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000946{
947 SRE_STATE state;
948 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +0300949 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000950 PyObject* item;
951 PyObject* filter;
952 PyObject* args;
953 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000954 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100955 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000956 Py_ssize_t n;
957 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300958 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000959 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600960 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000961
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000962 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +0000963 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000964 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +0000965 Py_INCREF(filter);
966 filter_is_callable = 1;
967 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000968 /* if not callable, check if it's a literal string */
969 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600970 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300971 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200972 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000973 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300974 if (charsize == 1)
975 literal = memchr(ptr, '\\', n) == NULL;
976 else
977 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000978 } else {
979 PyErr_Clear();
980 literal = 0;
981 }
Benjamin Petersone48944b2012-03-07 14:50:25 -0600982 if (view.buf)
983 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000984 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000985 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000986 Py_INCREF(filter);
987 filter_is_callable = 0;
988 } else {
989 /* not a literal; hand it over to the template compiler */
990 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +0000991 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000992 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000993 );
994 if (!filter)
995 return NULL;
996 filter_is_callable = PyCallable_Check(filter);
997 }
Fredrik Lundhdac58492001-10-21 21:48:30 +0000998 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000999
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001000 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001001 if (!string) {
1002 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001003 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001004 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001005
1006 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001007 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001008 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001009 state_fini(&state);
1010 return NULL;
1011 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001012
1013 n = i = 0;
1014
1015 while (!count || n < count) {
1016
1017 state_reset(&state);
1018
1019 state.ptr = state.start;
1020
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001021 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001022 if (PyErr_Occurred())
1023 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001024
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001025 if (status <= 0) {
1026 if (status == 0)
1027 break;
1028 pattern_error(status);
1029 goto error;
1030 }
Tim Peters3d563502006-01-21 02:47:53 +00001031
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001032 b = STATE_OFFSET(&state, state.start);
1033 e = STATE_OFFSET(&state, state.ptr);
1034
1035 if (i < b) {
1036 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001037 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001038 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001039 if (!item)
1040 goto error;
1041 status = PyList_Append(list, item);
1042 Py_DECREF(item);
1043 if (status < 0)
1044 goto error;
1045
1046 } else if (i == b && i == e && n > 0)
1047 /* ignore empty match on latest position */
1048 goto next;
1049
1050 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001051 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001052 match = pattern_new_match(self, &state, 1);
1053 if (!match)
1054 goto error;
Raymond Hettinger8ae46892003-10-12 19:09:37 +00001055 args = PyTuple_Pack(1, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001056 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00001057 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001058 goto error;
1059 }
1060 item = PyObject_CallObject(filter, args);
1061 Py_DECREF(args);
1062 Py_DECREF(match);
1063 if (!item)
1064 goto error;
1065 } else {
1066 /* filter is literal string */
1067 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001068 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001069 }
1070
1071 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001072 if (item != Py_None) {
1073 status = PyList_Append(list, item);
1074 Py_DECREF(item);
1075 if (status < 0)
1076 goto error;
1077 }
Tim Peters3d563502006-01-21 02:47:53 +00001078
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001079 i = e;
1080 n = n + 1;
1081
1082next:
1083 /* move on */
1084 if (state.ptr == state.start)
1085 state.start = (void*) ((char*) state.ptr + state.charsize);
1086 else
1087 state.start = state.ptr;
1088
1089 }
1090
1091 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001092 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001093 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001094 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001095 if (!item)
1096 goto error;
1097 status = PyList_Append(list, item);
1098 Py_DECREF(item);
1099 if (status < 0)
1100 goto error;
1101 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001102
1103 state_fini(&state);
1104
Guido van Rossum4e173842001-12-07 04:25:10 +00001105 Py_DECREF(filter);
1106
Fredrik Lundhdac58492001-10-21 21:48:30 +00001107 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001108 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001109 if (!joiner) {
1110 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001111 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001112 }
1113 if (PyList_GET_SIZE(list) == 0) {
1114 Py_DECREF(list);
1115 item = joiner;
1116 }
1117 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001118 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001119 item = _PyBytes_Join(joiner, list);
1120 else
1121 item = PyUnicode_Join(joiner, list);
1122 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001123 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001124 if (!item)
1125 return NULL;
1126 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001127
1128 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001129 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001130
1131 return item;
1132
1133error:
1134 Py_DECREF(list);
1135 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001136 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001137 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001138
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001139}
1140
1141static PyObject*
1142pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
1143{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001144 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001145 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001146 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001147 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001148 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001149 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001150 return NULL;
1151
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001152 return pattern_subx(self, ptemplate, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001153}
1154
1155static PyObject*
1156pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
1157{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001158 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001159 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001160 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001161 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001162 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001163 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001164 return NULL;
1165
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001166 return pattern_subx(self, ptemplate, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001167}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001168
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001169static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001170pattern_copy(PatternObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001171{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001172#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001173 PatternObject* copy;
1174 int offset;
1175
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001176 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
1177 if (!copy)
1178 return NULL;
1179
1180 offset = offsetof(PatternObject, groups);
1181
1182 Py_XINCREF(self->groupindex);
1183 Py_XINCREF(self->indexgroup);
1184 Py_XINCREF(self->pattern);
1185
1186 memcpy((char*) copy + offset, (char*) self + offset,
1187 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00001188 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001189
1190 return (PyObject*) copy;
1191#else
1192 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
1193 return NULL;
1194#endif
1195}
1196
1197static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001198pattern_deepcopy(PatternObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001199{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001200#ifdef USE_BUILTIN_COPY
1201 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00001202
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001203 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001204 if (!copy)
1205 return NULL;
1206
1207 if (!deepcopy(&copy->groupindex, memo) ||
1208 !deepcopy(&copy->indexgroup, memo) ||
1209 !deepcopy(&copy->pattern, memo)) {
1210 Py_DECREF(copy);
1211 return NULL;
1212 }
1213
1214#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001215 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
1216 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001217#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001218}
1219
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001220static PyObject *
1221pattern_repr(PatternObject *obj)
1222{
1223 static const struct {
1224 const char *name;
1225 int value;
1226 } flag_names[] = {
1227 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1228 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1229 {"re.LOCALE", SRE_FLAG_LOCALE},
1230 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1231 {"re.DOTALL", SRE_FLAG_DOTALL},
1232 {"re.UNICODE", SRE_FLAG_UNICODE},
1233 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1234 {"re.DEBUG", SRE_FLAG_DEBUG},
1235 {"re.ASCII", SRE_FLAG_ASCII},
1236 };
1237 PyObject *result = NULL;
1238 PyObject *flag_items;
1239 int i;
1240 int flags = obj->flags;
1241
1242 /* Omit re.UNICODE for valid string patterns. */
1243 if (obj->isbytes == 0 &&
1244 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1245 SRE_FLAG_UNICODE)
1246 flags &= ~SRE_FLAG_UNICODE;
1247
1248 flag_items = PyList_New(0);
1249 if (!flag_items)
1250 return NULL;
1251
1252 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1253 if (flags & flag_names[i].value) {
1254 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1255 if (!item)
1256 goto done;
1257
1258 if (PyList_Append(flag_items, item) < 0) {
1259 Py_DECREF(item);
1260 goto done;
1261 }
1262 Py_DECREF(item);
1263 flags &= ~flag_names[i].value;
1264 }
1265 }
1266 if (flags) {
1267 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1268 if (!item)
1269 goto done;
1270
1271 if (PyList_Append(flag_items, item) < 0) {
1272 Py_DECREF(item);
1273 goto done;
1274 }
1275 Py_DECREF(item);
1276 }
1277
1278 if (PyList_Size(flag_items) > 0) {
1279 PyObject *flags_result;
1280 PyObject *sep = PyUnicode_FromString("|");
1281 if (!sep)
1282 goto done;
1283 flags_result = PyUnicode_Join(sep, flag_items);
1284 Py_DECREF(sep);
1285 if (!flags_result)
1286 goto done;
1287 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1288 obj->pattern, flags_result);
1289 Py_DECREF(flags_result);
1290 }
1291 else {
1292 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1293 }
1294
1295done:
1296 Py_DECREF(flag_items);
1297 return result;
1298}
1299
Raymond Hettinger94478742004-09-24 04:31:19 +00001300PyDoc_STRVAR(pattern_match_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001301"match(string[, pos[, endpos]]) -> match object or None.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001302 Matches zero or more characters at the beginning of the string");
1303
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02001304PyDoc_STRVAR(pattern_fullmatch_doc,
1305"fullmatch(string[, pos[, endpos]]) -> match object or None.\n\
1306 Matches against all of the string");
1307
Raymond Hettinger94478742004-09-24 04:31:19 +00001308PyDoc_STRVAR(pattern_search_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001309"search(string[, pos[, endpos]]) -> match object or None.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001310 Scan through string looking for a match, and return a corresponding\n\
Andrew Svetlov0b64c142012-12-25 18:48:54 +02001311 match object instance. Return None if no position in the string matches.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001312
1313PyDoc_STRVAR(pattern_split_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001314"split(string[, maxsplit = 0]) -> list.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001315 Split string by the occurrences of pattern.");
1316
1317PyDoc_STRVAR(pattern_findall_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001318"findall(string[, pos[, endpos]]) -> list.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001319 Return a list of all non-overlapping matches of pattern in string.");
1320
1321PyDoc_STRVAR(pattern_finditer_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001322"finditer(string[, pos[, endpos]]) -> iterator.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001323 Return an iterator over all non-overlapping matches for the \n\
1324 RE pattern in string. For each match, the iterator returns a\n\
1325 match object.");
1326
1327PyDoc_STRVAR(pattern_sub_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001328"sub(repl, string[, count = 0]) -> newstring.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001329 Return the string obtained by replacing the leftmost non-overlapping\n\
Tim Peters3d563502006-01-21 02:47:53 +00001330 occurrences of pattern in string by the replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001331
1332PyDoc_STRVAR(pattern_subn_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001333"subn(repl, string[, count = 0]) -> (newstring, number of subs)\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001334 Return the tuple (new_string, number_of_subs_made) found by replacing\n\
1335 the leftmost non-overlapping occurrences of pattern with the\n\
Tim Peters3d563502006-01-21 02:47:53 +00001336 replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001337
1338PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
1339
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001340static PyMethodDef pattern_methods[] = {
Tim Peters3d563502006-01-21 02:47:53 +00001341 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001342 pattern_match_doc},
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02001343 {"fullmatch", (PyCFunction) pattern_fullmatch, METH_VARARGS|METH_KEYWORDS,
1344 pattern_fullmatch_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001345 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001346 pattern_search_doc},
Raymond Hettinger94478742004-09-24 04:31:19 +00001347 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001348 pattern_sub_doc},
Raymond Hettinger94478742004-09-24 04:31:19 +00001349 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001350 pattern_subn_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001351 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001352 pattern_split_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001353 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001354 pattern_findall_doc},
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001355 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001356 pattern_finditer_doc},
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001357 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS|METH_KEYWORDS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001358 {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
1359 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001360 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001361};
1362
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00001363#define PAT_OFF(x) offsetof(PatternObject, x)
1364static PyMemberDef pattern_members[] = {
1365 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
1366 {"flags", T_INT, PAT_OFF(flags), READONLY},
1367 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
1368 {"groupindex", T_OBJECT, PAT_OFF(groupindex), READONLY},
1369 {NULL} /* Sentinel */
1370};
Guido van Rossumb700df92000-03-31 14:59:30 +00001371
Neal Norwitz57c179c2006-03-22 07:18:02 +00001372static PyTypeObject Pattern_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001373 PyVarObject_HEAD_INIT(NULL, 0)
1374 "_" SRE_MODULE ".SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001375 sizeof(PatternObject), sizeof(SRE_CODE),
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001376 (destructor)pattern_dealloc, /* tp_dealloc */
1377 0, /* tp_print */
1378 0, /* tp_getattr */
1379 0, /* tp_setattr */
1380 0, /* tp_reserved */
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001381 (reprfunc)pattern_repr, /* tp_repr */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001382 0, /* tp_as_number */
1383 0, /* tp_as_sequence */
1384 0, /* tp_as_mapping */
1385 0, /* tp_hash */
1386 0, /* tp_call */
1387 0, /* tp_str */
1388 0, /* tp_getattro */
1389 0, /* tp_setattro */
1390 0, /* tp_as_buffer */
1391 Py_TPFLAGS_DEFAULT, /* tp_flags */
1392 pattern_doc, /* tp_doc */
1393 0, /* tp_traverse */
1394 0, /* tp_clear */
1395 0, /* tp_richcompare */
1396 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
1397 0, /* tp_iter */
1398 0, /* tp_iternext */
1399 pattern_methods, /* tp_methods */
1400 pattern_members, /* tp_members */
Guido van Rossumb700df92000-03-31 14:59:30 +00001401};
1402
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001403static int _validate(PatternObject *self); /* Forward */
1404
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001405static PyObject *
1406_compile(PyObject* self_, PyObject* args)
1407{
1408 /* "compile" pattern descriptor to pattern object */
1409
1410 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001411 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001412
1413 PyObject* pattern;
1414 int flags = 0;
1415 PyObject* code;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001416 Py_ssize_t groups = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001417 PyObject* groupindex = NULL;
1418 PyObject* indexgroup = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001419
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001420 if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001421 &PyList_Type, &code, &groups,
1422 &groupindex, &indexgroup))
1423 return NULL;
1424
1425 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001426 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001427 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1428 if (!self)
1429 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001430 self->weakreflist = NULL;
1431 self->pattern = NULL;
1432 self->groupindex = NULL;
1433 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001434
1435 self->codesize = n;
1436
1437 for (i = 0; i < n; i++) {
1438 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001439 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001440 self->code[i] = (SRE_CODE) value;
1441 if ((unsigned long) self->code[i] != value) {
1442 PyErr_SetString(PyExc_OverflowError,
1443 "regular expression code size limit exceeded");
1444 break;
1445 }
1446 }
1447
1448 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001449 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001450 return NULL;
1451 }
1452
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001453 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001454 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001455 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001456 else {
1457 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001458 int charsize;
1459 Py_buffer view;
1460 view.buf = NULL;
1461 if (!getstring(pattern, &p_length, &self->isbytes,
1462 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001463 Py_DECREF(self);
1464 return NULL;
1465 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001466 if (view.buf)
1467 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001468 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001469
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001470 Py_INCREF(pattern);
1471 self->pattern = pattern;
1472
1473 self->flags = flags;
1474
1475 self->groups = groups;
1476
1477 Py_XINCREF(groupindex);
1478 self->groupindex = groupindex;
1479
1480 Py_XINCREF(indexgroup);
1481 self->indexgroup = indexgroup;
1482
1483 self->weakreflist = NULL;
1484
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001485 if (!_validate(self)) {
1486 Py_DECREF(self);
1487 return NULL;
1488 }
1489
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001490 return (PyObject*) self;
1491}
1492
Guido van Rossumb700df92000-03-31 14:59:30 +00001493/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001494/* Code validation */
1495
1496/* To learn more about this code, have a look at the _compile() function in
1497 Lib/sre_compile.py. The validation functions below checks the code array
1498 for conformance with the code patterns generated there.
1499
1500 The nice thing about the generated code is that it is position-independent:
1501 all jumps are relative jumps forward. Also, jumps don't cross each other:
1502 the target of a later jump is always earlier than the target of an earlier
1503 jump. IOW, this is okay:
1504
1505 J---------J-------T--------T
1506 \ \_____/ /
1507 \______________________/
1508
1509 but this is not:
1510
1511 J---------J-------T--------T
1512 \_________\_____/ /
1513 \____________/
1514
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001515 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001516*/
1517
1518/* Defining this one enables tracing of the validator */
1519#undef VVERBOSE
1520
1521/* Trace macro for the validator */
1522#if defined(VVERBOSE)
1523#define VTRACE(v) printf v
1524#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001525#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001526#endif
1527
1528/* Report failure */
1529#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1530
1531/* Extract opcode, argument, or skip count from code array */
1532#define GET_OP \
1533 do { \
1534 VTRACE(("%p: ", code)); \
1535 if (code >= end) FAIL; \
1536 op = *code++; \
1537 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1538 } while (0)
1539#define GET_ARG \
1540 do { \
1541 VTRACE(("%p= ", code)); \
1542 if (code >= end) FAIL; \
1543 arg = *code++; \
1544 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1545 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001546#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001547 do { \
1548 VTRACE(("%p= ", code)); \
1549 if (code >= end) FAIL; \
1550 skip = *code; \
1551 VTRACE(("%lu (skip to %p)\n", \
1552 (unsigned long)skip, code+skip)); \
Victor Stinner1fa174a2013-08-28 02:06:21 +02001553 if (skip-adj > (Py_uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001554 FAIL; \
1555 code++; \
1556 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001557#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001558
1559static int
1560_validate_charset(SRE_CODE *code, SRE_CODE *end)
1561{
1562 /* Some variables are manipulated by the macros above */
1563 SRE_CODE op;
1564 SRE_CODE arg;
1565 SRE_CODE offset;
1566 int i;
1567
1568 while (code < end) {
1569 GET_OP;
1570 switch (op) {
1571
1572 case SRE_OP_NEGATE:
1573 break;
1574
1575 case SRE_OP_LITERAL:
1576 GET_ARG;
1577 break;
1578
1579 case SRE_OP_RANGE:
1580 GET_ARG;
1581 GET_ARG;
1582 break;
1583
1584 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001585 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001586 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001587 FAIL;
1588 code += offset;
1589 break;
1590
1591 case SRE_OP_BIGCHARSET:
1592 GET_ARG; /* Number of blocks */
1593 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001594 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001595 FAIL;
1596 /* Make sure that each byte points to a valid block */
1597 for (i = 0; i < 256; i++) {
1598 if (((unsigned char *)code)[i] >= arg)
1599 FAIL;
1600 }
1601 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001602 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001603 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001604 FAIL;
1605 code += offset;
1606 break;
1607
1608 case SRE_OP_CATEGORY:
1609 GET_ARG;
1610 switch (arg) {
1611 case SRE_CATEGORY_DIGIT:
1612 case SRE_CATEGORY_NOT_DIGIT:
1613 case SRE_CATEGORY_SPACE:
1614 case SRE_CATEGORY_NOT_SPACE:
1615 case SRE_CATEGORY_WORD:
1616 case SRE_CATEGORY_NOT_WORD:
1617 case SRE_CATEGORY_LINEBREAK:
1618 case SRE_CATEGORY_NOT_LINEBREAK:
1619 case SRE_CATEGORY_LOC_WORD:
1620 case SRE_CATEGORY_LOC_NOT_WORD:
1621 case SRE_CATEGORY_UNI_DIGIT:
1622 case SRE_CATEGORY_UNI_NOT_DIGIT:
1623 case SRE_CATEGORY_UNI_SPACE:
1624 case SRE_CATEGORY_UNI_NOT_SPACE:
1625 case SRE_CATEGORY_UNI_WORD:
1626 case SRE_CATEGORY_UNI_NOT_WORD:
1627 case SRE_CATEGORY_UNI_LINEBREAK:
1628 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1629 break;
1630 default:
1631 FAIL;
1632 }
1633 break;
1634
1635 default:
1636 FAIL;
1637
1638 }
1639 }
1640
1641 return 1;
1642}
1643
1644static int
1645_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1646{
1647 /* Some variables are manipulated by the macros above */
1648 SRE_CODE op;
1649 SRE_CODE arg;
1650 SRE_CODE skip;
1651
1652 VTRACE(("code=%p, end=%p\n", code, end));
1653
1654 if (code > end)
1655 FAIL;
1656
1657 while (code < end) {
1658 GET_OP;
1659 switch (op) {
1660
1661 case SRE_OP_MARK:
1662 /* We don't check whether marks are properly nested; the
1663 sre_match() code is robust even if they don't, and the worst
1664 you can get is nonsensical match results. */
1665 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001666 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001667 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1668 FAIL;
1669 }
1670 break;
1671
1672 case SRE_OP_LITERAL:
1673 case SRE_OP_NOT_LITERAL:
1674 case SRE_OP_LITERAL_IGNORE:
1675 case SRE_OP_NOT_LITERAL_IGNORE:
1676 GET_ARG;
1677 /* The arg is just a character, nothing to check */
1678 break;
1679
1680 case SRE_OP_SUCCESS:
1681 case SRE_OP_FAILURE:
1682 /* Nothing to check; these normally end the matching process */
1683 break;
1684
1685 case SRE_OP_AT:
1686 GET_ARG;
1687 switch (arg) {
1688 case SRE_AT_BEGINNING:
1689 case SRE_AT_BEGINNING_STRING:
1690 case SRE_AT_BEGINNING_LINE:
1691 case SRE_AT_END:
1692 case SRE_AT_END_LINE:
1693 case SRE_AT_END_STRING:
1694 case SRE_AT_BOUNDARY:
1695 case SRE_AT_NON_BOUNDARY:
1696 case SRE_AT_LOC_BOUNDARY:
1697 case SRE_AT_LOC_NON_BOUNDARY:
1698 case SRE_AT_UNI_BOUNDARY:
1699 case SRE_AT_UNI_NON_BOUNDARY:
1700 break;
1701 default:
1702 FAIL;
1703 }
1704 break;
1705
1706 case SRE_OP_ANY:
1707 case SRE_OP_ANY_ALL:
1708 /* These have no operands */
1709 break;
1710
1711 case SRE_OP_IN:
1712 case SRE_OP_IN_IGNORE:
1713 GET_SKIP;
1714 /* Stop 1 before the end; we check the FAILURE below */
1715 if (!_validate_charset(code, code+skip-2))
1716 FAIL;
1717 if (code[skip-2] != SRE_OP_FAILURE)
1718 FAIL;
1719 code += skip-1;
1720 break;
1721
1722 case SRE_OP_INFO:
1723 {
1724 /* A minimal info field is
1725 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1726 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1727 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001728 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001729 SRE_CODE *newcode;
1730 GET_SKIP;
1731 newcode = code+skip-1;
1732 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001733 GET_ARG;
1734 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001735 /* Check that only valid flags are present */
1736 if ((flags & ~(SRE_INFO_PREFIX |
1737 SRE_INFO_LITERAL |
1738 SRE_INFO_CHARSET)) != 0)
1739 FAIL;
1740 /* PREFIX and CHARSET are mutually exclusive */
1741 if ((flags & SRE_INFO_PREFIX) &&
1742 (flags & SRE_INFO_CHARSET))
1743 FAIL;
1744 /* LITERAL implies PREFIX */
1745 if ((flags & SRE_INFO_LITERAL) &&
1746 !(flags & SRE_INFO_PREFIX))
1747 FAIL;
1748 /* Validate the prefix */
1749 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001750 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001751 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001752 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001753 /* Here comes the prefix string */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001754 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001755 FAIL;
1756 code += prefix_len;
1757 /* And here comes the overlap table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001758 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001759 FAIL;
1760 /* Each overlap value should be < prefix_len */
1761 for (i = 0; i < prefix_len; i++) {
1762 if (code[i] >= prefix_len)
1763 FAIL;
1764 }
1765 code += prefix_len;
1766 }
1767 /* Validate the charset */
1768 if (flags & SRE_INFO_CHARSET) {
1769 if (!_validate_charset(code, newcode-1))
1770 FAIL;
1771 if (newcode[-1] != SRE_OP_FAILURE)
1772 FAIL;
1773 code = newcode;
1774 }
1775 else if (code != newcode) {
1776 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1777 FAIL;
1778 }
1779 }
1780 break;
1781
1782 case SRE_OP_BRANCH:
1783 {
1784 SRE_CODE *target = NULL;
1785 for (;;) {
1786 GET_SKIP;
1787 if (skip == 0)
1788 break;
1789 /* Stop 2 before the end; we check the JUMP below */
1790 if (!_validate_inner(code, code+skip-3, groups))
1791 FAIL;
1792 code += skip-3;
1793 /* Check that it ends with a JUMP, and that each JUMP
1794 has the same target */
1795 GET_OP;
1796 if (op != SRE_OP_JUMP)
1797 FAIL;
1798 GET_SKIP;
1799 if (target == NULL)
1800 target = code+skip-1;
1801 else if (code+skip-1 != target)
1802 FAIL;
1803 }
1804 }
1805 break;
1806
1807 case SRE_OP_REPEAT_ONE:
1808 case SRE_OP_MIN_REPEAT_ONE:
1809 {
1810 SRE_CODE min, max;
1811 GET_SKIP;
1812 GET_ARG; min = arg;
1813 GET_ARG; max = arg;
1814 if (min > max)
1815 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001816 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001817 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001818 if (!_validate_inner(code, code+skip-4, groups))
1819 FAIL;
1820 code += skip-4;
1821 GET_OP;
1822 if (op != SRE_OP_SUCCESS)
1823 FAIL;
1824 }
1825 break;
1826
1827 case SRE_OP_REPEAT:
1828 {
1829 SRE_CODE min, max;
1830 GET_SKIP;
1831 GET_ARG; min = arg;
1832 GET_ARG; max = arg;
1833 if (min > max)
1834 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001835 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001836 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001837 if (!_validate_inner(code, code+skip-3, groups))
1838 FAIL;
1839 code += skip-3;
1840 GET_OP;
1841 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1842 FAIL;
1843 }
1844 break;
1845
1846 case SRE_OP_GROUPREF:
1847 case SRE_OP_GROUPREF_IGNORE:
1848 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001849 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001850 FAIL;
1851 break;
1852
1853 case SRE_OP_GROUPREF_EXISTS:
1854 /* The regex syntax for this is: '(?(group)then|else)', where
1855 'group' is either an integer group number or a group name,
1856 'then' and 'else' are sub-regexes, and 'else' is optional. */
1857 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001858 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001859 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001860 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001861 code--; /* The skip is relative to the first arg! */
1862 /* There are two possibilities here: if there is both a 'then'
1863 part and an 'else' part, the generated code looks like:
1864
1865 GROUPREF_EXISTS
1866 <group>
1867 <skipyes>
1868 ...then part...
1869 JUMP
1870 <skipno>
1871 (<skipyes> jumps here)
1872 ...else part...
1873 (<skipno> jumps here)
1874
1875 If there is only a 'then' part, it looks like:
1876
1877 GROUPREF_EXISTS
1878 <group>
1879 <skip>
1880 ...then part...
1881 (<skip> jumps here)
1882
1883 There is no direct way to decide which it is, and we don't want
1884 to allow arbitrary jumps anywhere in the code; so we just look
1885 for a JUMP opcode preceding our skip target.
1886 */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001887 if (skip >= 3 && skip-3 < (Py_uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001888 code[skip-3] == SRE_OP_JUMP)
1889 {
1890 VTRACE(("both then and else parts present\n"));
1891 if (!_validate_inner(code+1, code+skip-3, groups))
1892 FAIL;
1893 code += skip-2; /* Position after JUMP, at <skipno> */
1894 GET_SKIP;
1895 if (!_validate_inner(code, code+skip-1, groups))
1896 FAIL;
1897 code += skip-1;
1898 }
1899 else {
1900 VTRACE(("only a then part present\n"));
1901 if (!_validate_inner(code+1, code+skip-1, groups))
1902 FAIL;
1903 code += skip-1;
1904 }
1905 break;
1906
1907 case SRE_OP_ASSERT:
1908 case SRE_OP_ASSERT_NOT:
1909 GET_SKIP;
1910 GET_ARG; /* 0 for lookahead, width for lookbehind */
1911 code--; /* Back up over arg to simplify math below */
1912 if (arg & 0x80000000)
1913 FAIL; /* Width too large */
1914 /* Stop 1 before the end; we check the SUCCESS below */
1915 if (!_validate_inner(code+1, code+skip-2, groups))
1916 FAIL;
1917 code += skip-2;
1918 GET_OP;
1919 if (op != SRE_OP_SUCCESS)
1920 FAIL;
1921 break;
1922
1923 default:
1924 FAIL;
1925
1926 }
1927 }
1928
1929 VTRACE(("okay\n"));
1930 return 1;
1931}
1932
1933static int
1934_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1935{
1936 if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
1937 FAIL;
1938 if (groups == 0) /* fix for simplejson */
1939 groups = 100; /* 100 groups should always be safe */
1940 return _validate_inner(code, end-1, groups);
1941}
1942
1943static int
1944_validate(PatternObject *self)
1945{
1946 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1947 {
1948 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1949 return 0;
1950 }
1951 else
1952 VTRACE(("Success!\n"));
1953 return 1;
1954}
1955
1956/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001957/* match methods */
1958
1959static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001960match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001961{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001962 Py_XDECREF(self->regs);
1963 Py_XDECREF(self->string);
1964 Py_DECREF(self->pattern);
1965 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001966}
1967
1968static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001969match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001970{
Serhiy Storchaka25324972013-10-16 12:46:28 +03001971 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001972 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001973 Py_buffer view;
1974 PyObject *result;
1975 void* ptr;
1976
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001977 if (index < 0 || index >= self->groups) {
1978 /* raise IndexError if we were given a bad group number */
1979 PyErr_SetString(
1980 PyExc_IndexError,
1981 "no such group"
1982 );
1983 return NULL;
1984 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001985
Fredrik Lundh6f013982000-07-03 18:44:21 +00001986 index *= 2;
1987
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001988 if (self->string == Py_None || self->mark[index] < 0) {
1989 /* return default value if the string or group is undefined */
1990 Py_INCREF(def);
1991 return def;
1992 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001993
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001994 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001995 if (ptr == NULL)
1996 return NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001997 result = getslice(isbytes, ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001998 self->string, self->mark[index], self->mark[index+1]);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001999 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03002000 PyBuffer_Release(&view);
2001 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002002}
2003
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002004static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002005match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002006{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002007 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002008
Guido van Rossumddefaf32007-01-14 03:31:43 +00002009 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002010 /* Default value */
2011 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00002012
Christian Heimes217cfd12007-12-02 14:31:20 +00002013 if (PyLong_Check(index))
2014 return PyLong_AsSsize_t(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00002015
Fredrik Lundh6f013982000-07-03 18:44:21 +00002016 i = -1;
2017
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002018 if (self->pattern->groupindex) {
2019 index = PyObject_GetItem(self->pattern->groupindex, index);
2020 if (index) {
Neal Norwitz1fe5f382007-08-31 04:32:55 +00002021 if (PyLong_Check(index))
Christian Heimes217cfd12007-12-02 14:31:20 +00002022 i = PyLong_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00002023 Py_DECREF(index);
2024 } else
2025 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002026 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002027
2028 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002029}
2030
2031static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002032match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002033{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002034 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002035}
2036
2037static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002038match_expand(MatchObject* self, PyObject* ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002039{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002040 /* delegate to Python code */
2041 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00002042 SRE_PY_MODULE, "_expand",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002043 PyTuple_Pack(3, self->pattern, self, ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002044 );
2045}
2046
2047static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002048match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002049{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002050 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002051 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002052
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002053 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002054
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002055 switch (size) {
2056 case 0:
2057 result = match_getslice(self, Py_False, Py_None);
2058 break;
2059 case 1:
2060 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2061 break;
2062 default:
2063 /* fetch multiple items */
2064 result = PyTuple_New(size);
2065 if (!result)
2066 return NULL;
2067 for (i = 0; i < size; i++) {
2068 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002069 self, PyTuple_GET_ITEM(args, i), Py_None
2070 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002071 if (!item) {
2072 Py_DECREF(result);
2073 return NULL;
2074 }
2075 PyTuple_SET_ITEM(result, i, item);
2076 }
2077 break;
2078 }
2079 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002080}
2081
2082static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002083match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002084{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002085 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002086 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002087
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002088 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002089 static char* kwlist[] = { "default", NULL };
Fredrik Lundh562586e2000-10-03 20:43:34 +00002090 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002091 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002092
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002093 result = PyTuple_New(self->groups-1);
2094 if (!result)
2095 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002096
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002097 for (index = 1; index < self->groups; index++) {
2098 PyObject* item;
2099 item = match_getslice_by_index(self, index, def);
2100 if (!item) {
2101 Py_DECREF(result);
2102 return NULL;
2103 }
2104 PyTuple_SET_ITEM(result, index-1, item);
2105 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002106
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002107 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002108}
2109
2110static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002111match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002112{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002113 PyObject* result;
2114 PyObject* keys;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002115 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002116
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002117 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002118 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00002119 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002120 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002121
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002122 result = PyDict_New();
2123 if (!result || !self->pattern->groupindex)
2124 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002125
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002126 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002127 if (!keys)
2128 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002129
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002130 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002131 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002132 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002133 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002134 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002135 if (!key)
2136 goto failed;
2137 value = match_getslice(self, key, def);
2138 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002139 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002140 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002141 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002142 status = PyDict_SetItem(result, key, value);
2143 Py_DECREF(value);
2144 if (status < 0)
2145 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002146 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002147
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002148 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002149
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002150 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002151
2152failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00002153 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002154 Py_DECREF(result);
2155 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002156}
2157
2158static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002159match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002160{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002161 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002162
Guido van Rossumddefaf32007-01-14 03:31:43 +00002163 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002164 if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002165 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002166
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002167 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002168
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002169 if (index < 0 || index >= self->groups) {
2170 PyErr_SetString(
2171 PyExc_IndexError,
2172 "no such group"
2173 );
2174 return NULL;
2175 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002176
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002177 /* mark is -1 if group is undefined */
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002178 return PyLong_FromSsize_t(self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002179}
2180
2181static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002182match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002183{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002184 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002185
Guido van Rossumddefaf32007-01-14 03:31:43 +00002186 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002187 if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002188 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002189
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002190 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002191
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002192 if (index < 0 || index >= self->groups) {
2193 PyErr_SetString(
2194 PyExc_IndexError,
2195 "no such group"
2196 );
2197 return NULL;
2198 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002199
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002200 /* mark is -1 if group is undefined */
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002201 return PyLong_FromSsize_t(self->mark[index*2+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002202}
2203
2204LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002205_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002206{
2207 PyObject* pair;
2208 PyObject* item;
2209
2210 pair = PyTuple_New(2);
2211 if (!pair)
2212 return NULL;
2213
Christian Heimes217cfd12007-12-02 14:31:20 +00002214 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002215 if (!item)
2216 goto error;
2217 PyTuple_SET_ITEM(pair, 0, item);
2218
Christian Heimes217cfd12007-12-02 14:31:20 +00002219 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002220 if (!item)
2221 goto error;
2222 PyTuple_SET_ITEM(pair, 1, item);
2223
2224 return pair;
2225
2226 error:
2227 Py_DECREF(pair);
2228 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002229}
2230
2231static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002232match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002233{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002234 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002235
Guido van Rossumddefaf32007-01-14 03:31:43 +00002236 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002237 if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002238 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002239
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002240 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002241
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002242 if (index < 0 || index >= self->groups) {
2243 PyErr_SetString(
2244 PyExc_IndexError,
2245 "no such group"
2246 );
2247 return NULL;
2248 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002249
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002250 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002251 return _pair(self->mark[index*2], self->mark[index*2+1]);
2252}
2253
2254static PyObject*
2255match_regs(MatchObject* self)
2256{
2257 PyObject* regs;
2258 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002259 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002260
2261 regs = PyTuple_New(self->groups);
2262 if (!regs)
2263 return NULL;
2264
2265 for (index = 0; index < self->groups; index++) {
2266 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2267 if (!item) {
2268 Py_DECREF(regs);
2269 return NULL;
2270 }
2271 PyTuple_SET_ITEM(regs, index, item);
2272 }
2273
2274 Py_INCREF(regs);
2275 self->regs = regs;
2276
2277 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002278}
2279
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002280static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002281match_copy(MatchObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002282{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002283#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002284 MatchObject* copy;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002285 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00002286
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002287 slots = 2 * (self->pattern->groups+1);
2288
2289 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2290 if (!copy)
2291 return NULL;
2292
2293 /* this value a constant, but any compiler should be able to
2294 figure that out all by itself */
2295 offset = offsetof(MatchObject, string);
2296
2297 Py_XINCREF(self->pattern);
2298 Py_XINCREF(self->string);
2299 Py_XINCREF(self->regs);
2300
2301 memcpy((char*) copy + offset, (char*) self + offset,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002302 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002303
2304 return (PyObject*) copy;
2305#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002306 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002307 return NULL;
2308#endif
2309}
2310
2311static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002312match_deepcopy(MatchObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002313{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002314#ifdef USE_BUILTIN_COPY
2315 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002316
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002317 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002318 if (!copy)
2319 return NULL;
2320
2321 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2322 !deepcopy(&copy->string, memo) ||
2323 !deepcopy(&copy->regs, memo)) {
2324 Py_DECREF(copy);
2325 return NULL;
2326 }
2327
2328#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002329 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2330 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002331#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002332}
2333
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002334PyDoc_STRVAR(match_doc,
2335"The result of re.match() and re.search().\n\
2336Match objects always have a boolean value of True.");
2337
2338PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002339"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002340 Return subgroup(s) of the match by indices or names.\n\
2341 For 0 returns the entire match.");
2342
2343PyDoc_STRVAR(match_start_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002344"start([group=0]) -> int.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002345 Return index of the start of the substring matched by group.");
2346
2347PyDoc_STRVAR(match_end_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002348"end([group=0]) -> int.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002349 Return index of the end of the substring matched by group.");
2350
2351PyDoc_STRVAR(match_span_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002352"span([group]) -> tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002353 For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).");
2354
2355PyDoc_STRVAR(match_groups_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002356"groups([default=None]) -> tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002357 Return a tuple containing all the subgroups of the match, from 1.\n\
2358 The default argument is used for groups\n\
2359 that did not participate in the match");
2360
2361PyDoc_STRVAR(match_groupdict_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002362"groupdict([default=None]) -> dict.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002363 Return a dictionary containing all the named subgroups of the match,\n\
2364 keyed by the subgroup name. The default argument is used for groups\n\
2365 that did not participate in the match");
2366
2367PyDoc_STRVAR(match_expand_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002368"expand(template) -> str.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002369 Return the string obtained by doing backslash substitution\n\
2370 on the string template, as done by the sub() method.");
2371
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002372static PyMethodDef match_methods[] = {
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002373 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2374 {"start", (PyCFunction) match_start, METH_VARARGS, match_start_doc},
2375 {"end", (PyCFunction) match_end, METH_VARARGS, match_end_doc},
2376 {"span", (PyCFunction) match_span, METH_VARARGS, match_span_doc},
2377 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS,
2378 match_groups_doc},
2379 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS,
2380 match_groupdict_doc},
2381 {"expand", (PyCFunction) match_expand, METH_O, match_expand_doc},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002382 {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
2383 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002384 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002385};
2386
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002387static PyObject *
2388match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002389{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002390 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002391 return PyLong_FromSsize_t(self->lastindex);
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002392 Py_INCREF(Py_None);
2393 return Py_None;
Guido van Rossumb700df92000-03-31 14:59:30 +00002394}
2395
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002396static PyObject *
2397match_lastgroup_get(MatchObject *self)
2398{
2399 if (self->pattern->indexgroup && self->lastindex >= 0) {
2400 PyObject* result = PySequence_GetItem(
2401 self->pattern->indexgroup, self->lastindex
2402 );
2403 if (result)
2404 return result;
2405 PyErr_Clear();
2406 }
2407 Py_INCREF(Py_None);
2408 return Py_None;
2409}
2410
2411static PyObject *
2412match_regs_get(MatchObject *self)
2413{
2414 if (self->regs) {
2415 Py_INCREF(self->regs);
2416 return self->regs;
2417 } else
2418 return match_regs(self);
2419}
2420
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002421static PyObject *
2422match_repr(MatchObject *self)
2423{
2424 PyObject *result;
2425 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2426 if (group0 == NULL)
2427 return NULL;
2428 result = PyUnicode_FromFormat(
2429 "<%s object; span=(%d, %d), match=%.50R>",
2430 Py_TYPE(self)->tp_name,
2431 self->mark[0], self->mark[1], group0);
2432 Py_DECREF(group0);
2433 return result;
2434}
2435
2436
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002437static PyGetSetDef match_getset[] = {
2438 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
2439 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
2440 {"regs", (getter)match_regs_get, (setter)NULL},
2441 {NULL}
2442};
2443
2444#define MATCH_OFF(x) offsetof(MatchObject, x)
2445static PyMemberDef match_members[] = {
2446 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
2447 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
2448 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
2449 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
2450 {NULL}
2451};
2452
Guido van Rossumb700df92000-03-31 14:59:30 +00002453/* FIXME: implement setattr("string", None) as a special case (to
2454 detach the associated string, if any */
2455
Neal Norwitz57c179c2006-03-22 07:18:02 +00002456static PyTypeObject Match_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002457 PyVarObject_HEAD_INIT(NULL,0)
2458 "_" SRE_MODULE ".SRE_Match",
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002459 sizeof(MatchObject), sizeof(Py_ssize_t),
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002460 (destructor)match_dealloc, /* tp_dealloc */
2461 0, /* tp_print */
2462 0, /* tp_getattr */
2463 0, /* tp_setattr */
2464 0, /* tp_reserved */
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002465 (reprfunc)match_repr, /* tp_repr */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002466 0, /* tp_as_number */
2467 0, /* tp_as_sequence */
2468 0, /* tp_as_mapping */
2469 0, /* tp_hash */
2470 0, /* tp_call */
2471 0, /* tp_str */
2472 0, /* tp_getattro */
2473 0, /* tp_setattro */
2474 0, /* tp_as_buffer */
2475 Py_TPFLAGS_DEFAULT, /* tp_flags */
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002476 match_doc, /* tp_doc */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002477 0, /* tp_traverse */
2478 0, /* tp_clear */
2479 0, /* tp_richcompare */
2480 0, /* tp_weaklistoffset */
2481 0, /* tp_iter */
2482 0, /* tp_iternext */
2483 match_methods, /* tp_methods */
2484 match_members, /* tp_members */
2485 match_getset, /* tp_getset */
Guido van Rossumb700df92000-03-31 14:59:30 +00002486};
2487
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002488static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002489pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002490{
2491 /* create match object (from state object) */
2492
2493 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002494 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002495 char* base;
2496 int n;
2497
2498 if (status > 0) {
2499
2500 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002501 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002502 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2503 2*(pattern->groups+1));
2504 if (!match)
2505 return NULL;
2506
2507 Py_INCREF(pattern);
2508 match->pattern = pattern;
2509
2510 Py_INCREF(state->string);
2511 match->string = state->string;
2512
2513 match->regs = NULL;
2514 match->groups = pattern->groups+1;
2515
2516 /* fill in group slices */
2517
2518 base = (char*) state->beginning;
2519 n = state->charsize;
2520
2521 match->mark[0] = ((char*) state->start - base) / n;
2522 match->mark[1] = ((char*) state->ptr - base) / n;
2523
2524 for (i = j = 0; i < pattern->groups; i++, j+=2)
2525 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2526 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2527 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2528 } else
2529 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2530
2531 match->pos = state->pos;
2532 match->endpos = state->endpos;
2533
2534 match->lastindex = state->lastindex;
2535
2536 return (PyObject*) match;
2537
2538 } else if (status == 0) {
2539
2540 /* no match */
2541 Py_INCREF(Py_None);
2542 return Py_None;
2543
2544 }
2545
2546 /* internal error */
2547 pattern_error(status);
2548 return NULL;
2549}
2550
2551
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002552/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002553/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002554
2555static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002556scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002557{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002558 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002559 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002560 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002561}
2562
2563static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002564scanner_match(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002565{
2566 SRE_STATE* state = &self->state;
2567 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002568 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002569
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002570 state_reset(state);
2571
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002572 state->ptr = state->start;
2573
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03002574 status = sre_match(state, PatternObject_GetCode(self->pattern), 0);
Thomas Wouters89f507f2006-12-13 04:49:30 +00002575 if (PyErr_Occurred())
2576 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002577
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002578 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002579 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002580
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00002581 if (status == 0 || state->ptr == state->start)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002582 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002583 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002584 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002585
2586 return match;
2587}
2588
2589
2590static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002591scanner_search(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002592{
2593 SRE_STATE* state = &self->state;
2594 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002595 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002596
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002597 state_reset(state);
2598
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002599 state->ptr = state->start;
2600
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002601 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002602 if (PyErr_Occurred())
2603 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002604
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002605 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002606 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002607
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00002608 if (status == 0 || state->ptr == state->start)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002609 state->start = (void*) ((char*) state->ptr + state->charsize);
2610 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002611 state->start = state->ptr;
2612
2613 return match;
2614}
2615
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002616static PyMethodDef scanner_methods[] = {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002617 {"match", (PyCFunction) scanner_match, METH_NOARGS},
2618 {"search", (PyCFunction) scanner_search, METH_NOARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002619 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002620};
2621
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002622#define SCAN_OFF(x) offsetof(ScannerObject, x)
2623static PyMemberDef scanner_members[] = {
Ezio Melotti7c8c1ea2011-09-29 01:00:19 +03002624 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002625 {NULL} /* Sentinel */
2626};
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002627
Neal Norwitz57c179c2006-03-22 07:18:02 +00002628static PyTypeObject Scanner_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002629 PyVarObject_HEAD_INIT(NULL, 0)
2630 "_" SRE_MODULE ".SRE_Scanner",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002631 sizeof(ScannerObject), 0,
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002632 (destructor)scanner_dealloc,/* tp_dealloc */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002633 0, /* tp_print */
2634 0, /* tp_getattr */
2635 0, /* tp_setattr */
2636 0, /* tp_reserved */
2637 0, /* tp_repr */
2638 0, /* tp_as_number */
2639 0, /* tp_as_sequence */
2640 0, /* tp_as_mapping */
2641 0, /* tp_hash */
2642 0, /* tp_call */
2643 0, /* tp_str */
2644 0, /* tp_getattro */
2645 0, /* tp_setattro */
2646 0, /* tp_as_buffer */
2647 Py_TPFLAGS_DEFAULT, /* tp_flags */
2648 0, /* tp_doc */
2649 0, /* tp_traverse */
2650 0, /* tp_clear */
2651 0, /* tp_richcompare */
2652 0, /* tp_weaklistoffset */
2653 0, /* tp_iter */
2654 0, /* tp_iternext */
2655 scanner_methods, /* tp_methods */
2656 scanner_members, /* tp_members */
2657 0, /* tp_getset */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002658};
2659
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002660static PyObject*
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06002661pattern_scanner(PatternObject* pattern, PyObject* args, PyObject* kw)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002662{
2663 /* create search state object */
2664
2665 ScannerObject* self;
2666
Serhiy Storchakaccdf3522014-03-06 11:28:32 +02002667 PyObject *string = NULL, *string2 = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002668 Py_ssize_t start = 0;
2669 Py_ssize_t end = PY_SSIZE_T_MAX;
Serhiy Storchakaccdf3522014-03-06 11:28:32 +02002670 static char* kwlist[] = { "string", "pos", "endpos", "source", NULL };
2671 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:scanner", kwlist,
2672 &string, &start, &end, &string2))
2673 return NULL;
2674
2675 string = fix_string_param(string, string2, "source");
2676 if (!string)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002677 return NULL;
2678
2679 /* create scanner object */
2680 self = PyObject_NEW(ScannerObject, &Scanner_Type);
2681 if (!self)
2682 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002683 self->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002684
2685 string = state_init(&self->state, pattern, string, start, end);
2686 if (!string) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002687 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002688 return NULL;
2689 }
2690
2691 Py_INCREF(pattern);
2692 self->pattern = (PyObject*) pattern;
2693
2694 return (PyObject*) self;
2695}
2696
Guido van Rossumb700df92000-03-31 14:59:30 +00002697static PyMethodDef _functions[] = {
Neal Norwitzb0493252002-03-31 14:44:22 +00002698 {"compile", _compile, METH_VARARGS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002699 {"getcodesize", sre_codesize, METH_NOARGS},
Neal Norwitzb0493252002-03-31 14:44:22 +00002700 {"getlower", sre_getlower, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002701 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002702};
2703
Martin v. Löwis1a214512008-06-11 05:26:20 +00002704static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002705 PyModuleDef_HEAD_INIT,
2706 "_" SRE_MODULE,
2707 NULL,
2708 -1,
2709 _functions,
2710 NULL,
2711 NULL,
2712 NULL,
2713 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002714};
2715
2716PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002717{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002718 PyObject* m;
2719 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002720 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002721
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002722 /* Patch object types */
2723 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2724 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002725 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002726
Martin v. Löwis1a214512008-06-11 05:26:20 +00002727 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002728 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002729 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002730 d = PyModule_GetDict(m);
2731
Christian Heimes217cfd12007-12-02 14:31:20 +00002732 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002733 if (x) {
2734 PyDict_SetItemString(d, "MAGIC", x);
2735 Py_DECREF(x);
2736 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002737
Christian Heimes217cfd12007-12-02 14:31:20 +00002738 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002739 if (x) {
2740 PyDict_SetItemString(d, "CODESIZE", x);
2741 Py_DECREF(x);
2742 }
2743
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002744 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2745 if (x) {
2746 PyDict_SetItemString(d, "MAXREPEAT", x);
2747 Py_DECREF(x);
2748 }
2749
Neal Norwitzfe537132007-08-26 03:55:15 +00002750 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002751 if (x) {
2752 PyDict_SetItemString(d, "copyright", x);
2753 Py_DECREF(x);
2754 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002755 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002756}
2757
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002758/* vim:ts=4:sw=4:et
2759*/