blob: 2d6961b3126e5cd7d3d6d03521f5c38eb5e85d09 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Georg Brandldaa1fa92013-10-13 09:32:59 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
Guido van Rossumb700df92000-03-31 14:59:30 +000025 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000026 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000027 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000028 * This version of the SRE library can be redistributed under CNRI's
29 * Python 1.6 license. For any other use, please contact Secret Labs
30 * AB (info@pythonware.com).
31 *
Guido van Rossumb700df92000-03-31 14:59:30 +000032 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000033 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000034 * other compatibility work.
35 */
36
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000037static char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000038 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000039
Thomas Wouters0e3f5912006-08-11 14:57:12 +000040#define PY_SSIZE_T_CLEAN
41
Guido van Rossumb700df92000-03-31 14:59:30 +000042#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000043#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000044
45#include "sre.h"
46
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030047#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
48
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000049#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000050
Fredrik Lundh436c3d582000-06-29 08:58:44 +000051/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000052#if !defined(SRE_MODULE)
53#define SRE_MODULE "sre"
54#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000055
Thomas Wouters9ada3d62006-04-21 09:47:09 +000056#define SRE_PY_MODULE "re"
57
Guido van Rossumb700df92000-03-31 14:59:30 +000058/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000059#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000060
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000061/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000062/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000063
64/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000065#define USE_FAST_SEARCH
66
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000067/* enables copy/deepcopy handling (work in progress) */
68#undef USE_BUILTIN_COPY
69
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000070/* -------------------------------------------------------------------- */
71
Fredrik Lundh80946112000-06-29 18:03:25 +000072#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000073#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000074#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000075/* fastest possible local call under MSVC */
76#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000077#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000078#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000079#else
80#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000081#endif
82
83/* error codes */
84#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000085#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000086#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000087#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000088#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000089
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000090#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000091#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000092#else
93#define TRACE(v)
94#endif
95
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000096/* -------------------------------------------------------------------- */
97/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000098
Fredrik Lundh436c3d582000-06-29 08:58:44 +000099/* default character predicates (run sre_chars.py to regenerate tables) */
100
101#define SRE_DIGIT_MASK 1
102#define SRE_SPACE_MASK 2
103#define SRE_LINEBREAK_MASK 4
104#define SRE_ALNUM_MASK 8
105#define SRE_WORD_MASK 16
106
Fredrik Lundh21009b92001-09-18 18:47:09 +0000107/* FIXME: this assumes ASCII. create tables in init_sre() instead */
108
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000109static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1102, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
11225, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
11324, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1140, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
11524, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
116
Fredrik Lundhb389df32000-06-29 12:48:37 +0000117static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000011810, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
11927, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
12044, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
12161, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
122108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
123122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
124106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
125120, 121, 122, 123, 124, 125, 126, 127 };
126
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000127#define SRE_IS_DIGIT(ch)\
128 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
129#define SRE_IS_SPACE(ch)\
130 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
131#define SRE_IS_LINEBREAK(ch)\
132 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
133#define SRE_IS_ALNUM(ch)\
134 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
135#define SRE_IS_WORD(ch)\
136 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000137
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000138static unsigned int sre_lower(unsigned int ch)
139{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000140 return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000141}
142
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000143/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000144/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
145 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000146#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000147#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
148
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000149static unsigned int sre_lower_locale(unsigned int ch)
150{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000151 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000152}
153
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000154/* unicode-specific character predicates */
155
Victor Stinner0058b862011-09-29 03:27:47 +0200156#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
157#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
158#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
159#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
160#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000161
162static unsigned int sre_lower_unicode(unsigned int ch)
163{
Victor Stinner0058b862011-09-29 03:27:47 +0200164 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000165}
166
Guido van Rossumb700df92000-03-31 14:59:30 +0000167LOCAL(int)
168sre_category(SRE_CODE category, unsigned int ch)
169{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000170 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000171
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000172 case SRE_CATEGORY_DIGIT:
173 return SRE_IS_DIGIT(ch);
174 case SRE_CATEGORY_NOT_DIGIT:
175 return !SRE_IS_DIGIT(ch);
176 case SRE_CATEGORY_SPACE:
177 return SRE_IS_SPACE(ch);
178 case SRE_CATEGORY_NOT_SPACE:
179 return !SRE_IS_SPACE(ch);
180 case SRE_CATEGORY_WORD:
181 return SRE_IS_WORD(ch);
182 case SRE_CATEGORY_NOT_WORD:
183 return !SRE_IS_WORD(ch);
184 case SRE_CATEGORY_LINEBREAK:
185 return SRE_IS_LINEBREAK(ch);
186 case SRE_CATEGORY_NOT_LINEBREAK:
187 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000188
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000189 case SRE_CATEGORY_LOC_WORD:
190 return SRE_LOC_IS_WORD(ch);
191 case SRE_CATEGORY_LOC_NOT_WORD:
192 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000193
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000194 case SRE_CATEGORY_UNI_DIGIT:
195 return SRE_UNI_IS_DIGIT(ch);
196 case SRE_CATEGORY_UNI_NOT_DIGIT:
197 return !SRE_UNI_IS_DIGIT(ch);
198 case SRE_CATEGORY_UNI_SPACE:
199 return SRE_UNI_IS_SPACE(ch);
200 case SRE_CATEGORY_UNI_NOT_SPACE:
201 return !SRE_UNI_IS_SPACE(ch);
202 case SRE_CATEGORY_UNI_WORD:
203 return SRE_UNI_IS_WORD(ch);
204 case SRE_CATEGORY_UNI_NOT_WORD:
205 return !SRE_UNI_IS_WORD(ch);
206 case SRE_CATEGORY_UNI_LINEBREAK:
207 return SRE_UNI_IS_LINEBREAK(ch);
208 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
209 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000210 }
211 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000212}
213
214/* helpers */
215
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000216static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000217data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000218{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000219 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000220 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000221 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000222 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000223 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000224}
225
226static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000227data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000228{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000229 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000230 minsize = state->data_stack_base+size;
231 cursize = state->data_stack_size;
232 if (cursize < minsize) {
233 void* stack;
234 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300235 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000236 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000237 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000238 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000239 return SRE_ERROR_MEMORY;
240 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000241 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000242 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000243 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000244 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000245}
246
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000247/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000248
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300249#define SRE_CHAR Py_UCS1
250#define SIZEOF_SRE_CHAR 1
251#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300252#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000253
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300254/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000255
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300256#define SRE_CHAR Py_UCS2
257#define SIZEOF_SRE_CHAR 2
258#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300259#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000260
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300261/* generate 32-bit unicode version */
262
263#define SRE_CHAR Py_UCS4
264#define SIZEOF_SRE_CHAR 4
265#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300266#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000267
268/* -------------------------------------------------------------------- */
269/* factories and destructors */
270
271/* see sre.h for object declarations */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000272static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int);
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600273static PyObject*pattern_scanner(PatternObject*, PyObject*, PyObject* kw);
Guido van Rossumb700df92000-03-31 14:59:30 +0000274
275static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000276sre_codesize(PyObject* self, PyObject *unused)
Guido van Rossumb700df92000-03-31 14:59:30 +0000277{
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100278 return PyLong_FromSize_t(sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +0000279}
280
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000281static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +0000282sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000283{
284 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000285 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000286 return NULL;
287 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000288 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000289 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000290 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundhb389df32000-06-29 12:48:37 +0000291 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000292}
293
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000294LOCAL(void)
295state_reset(SRE_STATE* state)
296{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000297 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000298 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000299
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000300 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000301 state->lastindex = -1;
302
303 state->repeat = NULL;
304
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000305 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000306}
307
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000308static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200309getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300310 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600311 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000312{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000313 /* given a python object, return a data pointer, a length (in
314 characters), and a character size. return NULL if the object
315 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000316
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000317 /* Unicode objects do not support the buffer API. So, get the data
318 directly instead. */
319 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200320 if (PyUnicode_READY(string) == -1)
321 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200322 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200323 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300324 *p_isbytes = 0;
325 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000326 }
327
Victor Stinner0058b862011-09-29 03:27:47 +0200328 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300329 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
330 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
331 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000332 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000333
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300334 *p_length = view->len;
335 *p_charsize = 1;
336 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000337
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300338 if (view->buf == NULL) {
339 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
340 PyBuffer_Release(view);
341 view->buf = NULL;
342 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000343 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300344 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000345}
346
347LOCAL(PyObject*)
348state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000349 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000350{
351 /* prepare state object */
352
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000353 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300354 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000355 void* ptr;
356
357 memset(state, 0, sizeof(SRE_STATE));
358
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000359 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000360 state->lastindex = -1;
361
Benjamin Petersone48944b2012-03-07 14:50:25 -0600362 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300363 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000364 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600365 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000366
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300367 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600368 PyErr_SetString(PyExc_TypeError,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300369 "can't use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600370 goto err;
371 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300372 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600373 PyErr_SetString(PyExc_TypeError,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300374 "can't use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600375 goto err;
376 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000377
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000378 /* adjust boundaries */
379 if (start < 0)
380 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000381 else if (start > length)
382 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000383
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000384 if (end < 0)
385 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000386 else if (end > length)
387 end = length;
388
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300389 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000390 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000391
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000392 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000393
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000394 state->start = (void*) ((char*) ptr + start * state->charsize);
395 state->end = (void*) ((char*) ptr + end * state->charsize);
396
397 Py_INCREF(string);
398 state->string = string;
399 state->pos = start;
400 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000401
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000402 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000403 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000404 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000405 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000406 else
Fredrik Lundhb389df32000-06-29 12:48:37 +0000407 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000408
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000409 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600410 err:
411 if (state->buffer.buf)
412 PyBuffer_Release(&state->buffer);
413 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000414}
415
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000416LOCAL(void)
417state_fini(SRE_STATE* state)
418{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600419 if (state->buffer.buf)
420 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000421 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000422 data_stack_dealloc(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000423}
424
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000425/* calculate offset from start of string */
426#define STATE_OFFSET(state, member)\
427 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
428
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000429LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300430getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300431 PyObject* string, Py_ssize_t start, Py_ssize_t end)
432{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300433 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300434 if (PyBytes_CheckExact(string) &&
435 start == 0 && end == PyBytes_GET_SIZE(string)) {
436 Py_INCREF(string);
437 return string;
438 }
439 return PyBytes_FromStringAndSize(
440 (const char *)ptr + start, end - start);
441 }
442 else {
443 return PyUnicode_Substring(string, start, end);
444 }
445}
446
447LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000448state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000449{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000450 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000451
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000452 index = (index - 1) * 2;
453
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000454 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000455 if (empty)
456 /* want empty string */
457 i = j = 0;
458 else {
459 Py_INCREF(Py_None);
460 return Py_None;
461 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000462 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000463 i = STATE_OFFSET(state, state->mark[index]);
464 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000465 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000466
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300467 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000468}
469
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000470static void
471pattern_error(int status)
472{
473 switch (status) {
474 case SRE_ERROR_RECURSION_LIMIT:
475 PyErr_SetString(
476 PyExc_RuntimeError,
477 "maximum recursion limit exceeded"
478 );
479 break;
480 case SRE_ERROR_MEMORY:
481 PyErr_NoMemory();
482 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000483 case SRE_ERROR_INTERRUPTED:
484 /* An exception has already been raised, so let it fly */
485 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000486 default:
487 /* other error codes indicate compiler/engine bugs */
488 PyErr_SetString(
489 PyExc_RuntimeError,
490 "internal error in regular expression engine"
491 );
492 }
493}
494
Guido van Rossumb700df92000-03-31 14:59:30 +0000495static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000496pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000497{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000498 if (self->weakreflist != NULL)
499 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000500 Py_XDECREF(self->pattern);
501 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000502 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000503 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000504}
505
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300506LOCAL(Py_ssize_t)
507sre_match(SRE_STATE* state, SRE_CODE* pattern)
508{
509 if (state->charsize == 1)
510 return sre_ucs1_match(state, pattern);
511 if (state->charsize == 2)
512 return sre_ucs2_match(state, pattern);
513 assert(state->charsize == 4);
514 return sre_ucs4_match(state, pattern);
515}
516
517LOCAL(Py_ssize_t)
518sre_search(SRE_STATE* state, SRE_CODE* pattern)
519{
520 if (state->charsize == 1)
521 return sre_ucs1_search(state, pattern);
522 if (state->charsize == 2)
523 return sre_ucs2_search(state, pattern);
524 assert(state->charsize == 4);
525 return sre_ucs4_search(state, pattern);
526}
527
Guido van Rossumb700df92000-03-31 14:59:30 +0000528static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +0000529pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +0000530{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000531 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100532 Py_ssize_t status;
Guido van Rossumb700df92000-03-31 14:59:30 +0000533
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000534 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000535 Py_ssize_t start = 0;
536 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +0000537 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000538 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:match", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +0000539 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000540 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000541
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000542 string = state_init(&state, self, string, start, end);
543 if (!string)
544 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000545
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000546 state.ptr = state.start;
547
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000548 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
549
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300550 status = sre_match(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000551
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000552 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Thomas Wouters89f507f2006-12-13 04:49:30 +0000553 if (PyErr_Occurred())
554 return NULL;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000555
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000556 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +0000557
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000558 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +0000559}
560
561static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +0000562pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +0000563{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000564 SRE_STATE state;
565 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +0000566
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000567 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000568 Py_ssize_t start = 0;
569 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +0000570 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000571 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:search", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +0000572 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000573 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000574
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000575 string = state_init(&state, self, string, start, end);
576 if (!string)
577 return NULL;
578
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000579 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
580
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300581 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000582
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000583 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
584
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000585 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +0000586
Thomas Wouters89f507f2006-12-13 04:49:30 +0000587 if (PyErr_Occurred())
588 return NULL;
589
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000590 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +0000591}
592
593static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000594call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000595{
596 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000597 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000598 PyObject* func;
599 PyObject* result;
600
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000601 if (!args)
602 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000603 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000604 if (!name)
605 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000606 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000607 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000608 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000609 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000610 func = PyObject_GetAttrString(mod, function);
611 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000612 if (!func)
613 return NULL;
614 result = PyObject_CallObject(func, args);
615 Py_DECREF(func);
616 Py_DECREF(args);
617 return result;
618}
619
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000620#ifdef USE_BUILTIN_COPY
621static int
622deepcopy(PyObject** object, PyObject* memo)
623{
624 PyObject* copy;
625
626 copy = call(
627 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000628 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000629 );
630 if (!copy)
631 return 0;
632
633 Py_DECREF(*object);
634 *object = copy;
635
636 return 1; /* success */
637}
638#endif
639
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000640static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +0000641pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +0000642{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000643 SRE_STATE state;
644 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100645 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000646 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000647
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000648 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000649 Py_ssize_t start = 0;
650 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +0000651 static char* kwlist[] = { "source", "pos", "endpos", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000652 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +0000653 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000654 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000655
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000656 string = state_init(&state, self, string, start, end);
657 if (!string)
658 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000659
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000660 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000661 if (!list) {
662 state_fini(&state);
663 return NULL;
664 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000665
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000666 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000667
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000668 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000669
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000670 state_reset(&state);
671
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000672 state.ptr = state.start;
673
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300674 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300675 if (PyErr_Occurred())
676 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000677
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000678 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000679 if (status == 0)
680 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000681 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000682 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000683 }
Tim Peters3d563502006-01-21 02:47:53 +0000684
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000685 /* don't bother to build a match object */
686 switch (self->groups) {
687 case 0:
688 b = STATE_OFFSET(&state, state.start);
689 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300690 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300691 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000692 if (!item)
693 goto error;
694 break;
695 case 1:
696 item = state_getslice(&state, 1, string, 1);
697 if (!item)
698 goto error;
699 break;
700 default:
701 item = PyTuple_New(self->groups);
702 if (!item)
703 goto error;
704 for (i = 0; i < self->groups; i++) {
705 PyObject* o = state_getslice(&state, i+1, string, 1);
706 if (!o) {
707 Py_DECREF(item);
708 goto error;
709 }
710 PyTuple_SET_ITEM(item, i, o);
711 }
712 break;
713 }
714
715 status = PyList_Append(list, item);
716 Py_DECREF(item);
717 if (status < 0)
718 goto error;
719
720 if (state.ptr == state.start)
721 state.start = (void*) ((char*) state.ptr + state.charsize);
722 else
723 state.start = state.ptr;
724
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000725 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000726
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000727 state_fini(&state);
728 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000729
730error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000731 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000732 state_fini(&state);
733 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000734
Guido van Rossumb700df92000-03-31 14:59:30 +0000735}
736
Fredrik Lundh703ce812001-10-24 22:16:30 +0000737static PyObject*
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600738pattern_finditer(PatternObject* pattern, PyObject* args, PyObject* kw)
Fredrik Lundh703ce812001-10-24 22:16:30 +0000739{
740 PyObject* scanner;
741 PyObject* search;
742 PyObject* iterator;
743
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600744 scanner = pattern_scanner(pattern, args, kw);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000745 if (!scanner)
746 return NULL;
747
748 search = PyObject_GetAttrString(scanner, "search");
749 Py_DECREF(scanner);
750 if (!search)
751 return NULL;
752
753 iterator = PyCallIter_New(search, Py_None);
754 Py_DECREF(search);
755
756 return iterator;
757}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000758
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000759static PyObject*
760pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
761{
762 SRE_STATE state;
763 PyObject* list;
764 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100765 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000766 Py_ssize_t n;
767 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000768 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000769
770 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000771 Py_ssize_t maxsplit = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +0000772 static char* kwlist[] = { "source", "maxsplit", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000773 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|n:split", kwlist,
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000774 &string, &maxsplit))
775 return NULL;
776
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000777 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000778 if (!string)
779 return NULL;
780
781 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000782 if (!list) {
783 state_fini(&state);
784 return NULL;
785 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000786
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000787 n = 0;
788 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000789
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000790 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000791
792 state_reset(&state);
793
794 state.ptr = state.start;
795
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300796 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300797 if (PyErr_Occurred())
798 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000799
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000800 if (status <= 0) {
801 if (status == 0)
802 break;
803 pattern_error(status);
804 goto error;
805 }
Tim Peters3d563502006-01-21 02:47:53 +0000806
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000807 if (state.start == state.ptr) {
808 if (last == state.end)
809 break;
810 /* skip one character */
811 state.start = (void*) ((char*) state.ptr + state.charsize);
812 continue;
813 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000814
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000815 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300816 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000817 string, STATE_OFFSET(&state, last),
818 STATE_OFFSET(&state, state.start)
819 );
820 if (!item)
821 goto error;
822 status = PyList_Append(list, item);
823 Py_DECREF(item);
824 if (status < 0)
825 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000826
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000827 /* add groups (if any) */
828 for (i = 0; i < self->groups; i++) {
829 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000830 if (!item)
831 goto error;
832 status = PyList_Append(list, item);
833 Py_DECREF(item);
834 if (status < 0)
835 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000836 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000837
838 n = n + 1;
839
840 last = state.start = state.ptr;
841
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000842 }
843
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000844 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300845 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000846 string, STATE_OFFSET(&state, last), state.endpos
847 );
848 if (!item)
849 goto error;
850 status = PyList_Append(list, item);
851 Py_DECREF(item);
852 if (status < 0)
853 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000854
855 state_fini(&state);
856 return list;
857
858error:
859 Py_DECREF(list);
860 state_fini(&state);
861 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000862
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000863}
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000864
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000865static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000866pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000867 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000868{
869 SRE_STATE state;
870 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +0300871 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000872 PyObject* item;
873 PyObject* filter;
874 PyObject* args;
875 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000876 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100877 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000878 Py_ssize_t n;
879 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300880 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000881 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600882 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000883
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000884 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +0000885 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000886 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +0000887 Py_INCREF(filter);
888 filter_is_callable = 1;
889 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000890 /* if not callable, check if it's a literal string */
891 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600892 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300893 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200894 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000895 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300896 if (charsize == 1)
897 literal = memchr(ptr, '\\', n) == NULL;
898 else
899 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000900 } else {
901 PyErr_Clear();
902 literal = 0;
903 }
Benjamin Petersone48944b2012-03-07 14:50:25 -0600904 if (view.buf)
905 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000906 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000907 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000908 Py_INCREF(filter);
909 filter_is_callable = 0;
910 } else {
911 /* not a literal; hand it over to the template compiler */
912 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +0000913 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000914 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000915 );
916 if (!filter)
917 return NULL;
918 filter_is_callable = PyCallable_Check(filter);
919 }
Fredrik Lundhdac58492001-10-21 21:48:30 +0000920 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000921
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000922 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh82b23072001-12-09 16:13:15 +0000923 if (!string) {
924 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000925 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +0000926 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000927
928 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000929 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +0000930 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000931 state_fini(&state);
932 return NULL;
933 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000934
935 n = i = 0;
936
937 while (!count || n < count) {
938
939 state_reset(&state);
940
941 state.ptr = state.start;
942
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300943 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300944 if (PyErr_Occurred())
945 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000946
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000947 if (status <= 0) {
948 if (status == 0)
949 break;
950 pattern_error(status);
951 goto error;
952 }
Tim Peters3d563502006-01-21 02:47:53 +0000953
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000954 b = STATE_OFFSET(&state, state.start);
955 e = STATE_OFFSET(&state, state.ptr);
956
957 if (i < b) {
958 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300959 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300960 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000961 if (!item)
962 goto error;
963 status = PyList_Append(list, item);
964 Py_DECREF(item);
965 if (status < 0)
966 goto error;
967
968 } else if (i == b && i == e && n > 0)
969 /* ignore empty match on latest position */
970 goto next;
971
972 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +0000973 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000974 match = pattern_new_match(self, &state, 1);
975 if (!match)
976 goto error;
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000977 args = PyTuple_Pack(1, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000978 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +0000979 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000980 goto error;
981 }
982 item = PyObject_CallObject(filter, args);
983 Py_DECREF(args);
984 Py_DECREF(match);
985 if (!item)
986 goto error;
987 } else {
988 /* filter is literal string */
989 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +0000990 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000991 }
992
993 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000994 if (item != Py_None) {
995 status = PyList_Append(list, item);
996 Py_DECREF(item);
997 if (status < 0)
998 goto error;
999 }
Tim Peters3d563502006-01-21 02:47:53 +00001000
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001001 i = e;
1002 n = n + 1;
1003
1004next:
1005 /* move on */
1006 if (state.ptr == state.start)
1007 state.start = (void*) ((char*) state.ptr + state.charsize);
1008 else
1009 state.start = state.ptr;
1010
1011 }
1012
1013 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001014 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001015 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001016 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001017 if (!item)
1018 goto error;
1019 status = PyList_Append(list, item);
1020 Py_DECREF(item);
1021 if (status < 0)
1022 goto error;
1023 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001024
1025 state_fini(&state);
1026
Guido van Rossum4e173842001-12-07 04:25:10 +00001027 Py_DECREF(filter);
1028
Fredrik Lundhdac58492001-10-21 21:48:30 +00001029 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001030 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001031 if (!joiner) {
1032 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001033 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001034 }
1035 if (PyList_GET_SIZE(list) == 0) {
1036 Py_DECREF(list);
1037 item = joiner;
1038 }
1039 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001040 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001041 item = _PyBytes_Join(joiner, list);
1042 else
1043 item = PyUnicode_Join(joiner, list);
1044 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001045 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001046 if (!item)
1047 return NULL;
1048 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001049
1050 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001051 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001052
1053 return item;
1054
1055error:
1056 Py_DECREF(list);
1057 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001058 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001059 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001060
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001061}
1062
1063static PyObject*
1064pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
1065{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001066 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001067 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001068 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001069 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001070 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001071 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001072 return NULL;
1073
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001074 return pattern_subx(self, ptemplate, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001075}
1076
1077static PyObject*
1078pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
1079{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001080 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001081 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001082 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001083 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001084 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001085 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001086 return NULL;
1087
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001088 return pattern_subx(self, ptemplate, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001089}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001090
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001091static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001092pattern_copy(PatternObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001093{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001094#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001095 PatternObject* copy;
1096 int offset;
1097
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001098 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
1099 if (!copy)
1100 return NULL;
1101
1102 offset = offsetof(PatternObject, groups);
1103
1104 Py_XINCREF(self->groupindex);
1105 Py_XINCREF(self->indexgroup);
1106 Py_XINCREF(self->pattern);
1107
1108 memcpy((char*) copy + offset, (char*) self + offset,
1109 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00001110 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001111
1112 return (PyObject*) copy;
1113#else
1114 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
1115 return NULL;
1116#endif
1117}
1118
1119static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001120pattern_deepcopy(PatternObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001121{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001122#ifdef USE_BUILTIN_COPY
1123 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00001124
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001125 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001126 if (!copy)
1127 return NULL;
1128
1129 if (!deepcopy(&copy->groupindex, memo) ||
1130 !deepcopy(&copy->indexgroup, memo) ||
1131 !deepcopy(&copy->pattern, memo)) {
1132 Py_DECREF(copy);
1133 return NULL;
1134 }
1135
1136#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001137 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
1138 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001139#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001140}
1141
Raymond Hettinger94478742004-09-24 04:31:19 +00001142PyDoc_STRVAR(pattern_match_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001143"match(string[, pos[, endpos]]) -> match object or None.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001144 Matches zero or more characters at the beginning of the string");
1145
1146PyDoc_STRVAR(pattern_search_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001147"search(string[, pos[, endpos]]) -> match object or None.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001148 Scan through string looking for a match, and return a corresponding\n\
Andrew Svetlov0b64c142012-12-25 18:48:54 +02001149 match object instance. Return None if no position in the string matches.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001150
1151PyDoc_STRVAR(pattern_split_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001152"split(string[, maxsplit = 0]) -> list.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001153 Split string by the occurrences of pattern.");
1154
1155PyDoc_STRVAR(pattern_findall_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001156"findall(string[, pos[, endpos]]) -> list.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001157 Return a list of all non-overlapping matches of pattern in string.");
1158
1159PyDoc_STRVAR(pattern_finditer_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001160"finditer(string[, pos[, endpos]]) -> iterator.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001161 Return an iterator over all non-overlapping matches for the \n\
1162 RE pattern in string. For each match, the iterator returns a\n\
1163 match object.");
1164
1165PyDoc_STRVAR(pattern_sub_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001166"sub(repl, string[, count = 0]) -> newstring.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001167 Return the string obtained by replacing the leftmost non-overlapping\n\
Tim Peters3d563502006-01-21 02:47:53 +00001168 occurrences of pattern in string by the replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001169
1170PyDoc_STRVAR(pattern_subn_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001171"subn(repl, string[, count = 0]) -> (newstring, number of subs)\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001172 Return the tuple (new_string, number_of_subs_made) found by replacing\n\
1173 the leftmost non-overlapping occurrences of pattern with the\n\
Tim Peters3d563502006-01-21 02:47:53 +00001174 replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001175
1176PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
1177
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001178static PyMethodDef pattern_methods[] = {
Tim Peters3d563502006-01-21 02:47:53 +00001179 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001180 pattern_match_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001181 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001182 pattern_search_doc},
Raymond Hettinger94478742004-09-24 04:31:19 +00001183 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001184 pattern_sub_doc},
Raymond Hettinger94478742004-09-24 04:31:19 +00001185 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001186 pattern_subn_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001187 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001188 pattern_split_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001189 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001190 pattern_findall_doc},
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001191 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001192 pattern_finditer_doc},
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001193 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS|METH_KEYWORDS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001194 {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
1195 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001196 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001197};
1198
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00001199#define PAT_OFF(x) offsetof(PatternObject, x)
1200static PyMemberDef pattern_members[] = {
1201 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
1202 {"flags", T_INT, PAT_OFF(flags), READONLY},
1203 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
1204 {"groupindex", T_OBJECT, PAT_OFF(groupindex), READONLY},
1205 {NULL} /* Sentinel */
1206};
Guido van Rossumb700df92000-03-31 14:59:30 +00001207
Neal Norwitz57c179c2006-03-22 07:18:02 +00001208static PyTypeObject Pattern_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001209 PyVarObject_HEAD_INIT(NULL, 0)
1210 "_" SRE_MODULE ".SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001211 sizeof(PatternObject), sizeof(SRE_CODE),
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001212 (destructor)pattern_dealloc, /* tp_dealloc */
1213 0, /* tp_print */
1214 0, /* tp_getattr */
1215 0, /* tp_setattr */
1216 0, /* tp_reserved */
1217 0, /* tp_repr */
1218 0, /* tp_as_number */
1219 0, /* tp_as_sequence */
1220 0, /* tp_as_mapping */
1221 0, /* tp_hash */
1222 0, /* tp_call */
1223 0, /* tp_str */
1224 0, /* tp_getattro */
1225 0, /* tp_setattro */
1226 0, /* tp_as_buffer */
1227 Py_TPFLAGS_DEFAULT, /* tp_flags */
1228 pattern_doc, /* tp_doc */
1229 0, /* tp_traverse */
1230 0, /* tp_clear */
1231 0, /* tp_richcompare */
1232 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
1233 0, /* tp_iter */
1234 0, /* tp_iternext */
1235 pattern_methods, /* tp_methods */
1236 pattern_members, /* tp_members */
Guido van Rossumb700df92000-03-31 14:59:30 +00001237};
1238
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001239static int _validate(PatternObject *self); /* Forward */
1240
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001241static PyObject *
1242_compile(PyObject* self_, PyObject* args)
1243{
1244 /* "compile" pattern descriptor to pattern object */
1245
1246 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001247 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001248
1249 PyObject* pattern;
1250 int flags = 0;
1251 PyObject* code;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001252 Py_ssize_t groups = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001253 PyObject* groupindex = NULL;
1254 PyObject* indexgroup = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001255
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001256 if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001257 &PyList_Type, &code, &groups,
1258 &groupindex, &indexgroup))
1259 return NULL;
1260
1261 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001262 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001263 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1264 if (!self)
1265 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001266 self->weakreflist = NULL;
1267 self->pattern = NULL;
1268 self->groupindex = NULL;
1269 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001270
1271 self->codesize = n;
1272
1273 for (i = 0; i < n; i++) {
1274 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001275 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001276 self->code[i] = (SRE_CODE) value;
1277 if ((unsigned long) self->code[i] != value) {
1278 PyErr_SetString(PyExc_OverflowError,
1279 "regular expression code size limit exceeded");
1280 break;
1281 }
1282 }
1283
1284 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001285 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001286 return NULL;
1287 }
1288
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001289 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001290 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001291 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001292 else {
1293 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001294 int charsize;
1295 Py_buffer view;
1296 view.buf = NULL;
1297 if (!getstring(pattern, &p_length, &self->isbytes,
1298 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001299 Py_DECREF(self);
1300 return NULL;
1301 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001302 if (view.buf)
1303 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001304 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001305
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001306 Py_INCREF(pattern);
1307 self->pattern = pattern;
1308
1309 self->flags = flags;
1310
1311 self->groups = groups;
1312
1313 Py_XINCREF(groupindex);
1314 self->groupindex = groupindex;
1315
1316 Py_XINCREF(indexgroup);
1317 self->indexgroup = indexgroup;
1318
1319 self->weakreflist = NULL;
1320
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001321 if (!_validate(self)) {
1322 Py_DECREF(self);
1323 return NULL;
1324 }
1325
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001326 return (PyObject*) self;
1327}
1328
Guido van Rossumb700df92000-03-31 14:59:30 +00001329/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001330/* Code validation */
1331
1332/* To learn more about this code, have a look at the _compile() function in
1333 Lib/sre_compile.py. The validation functions below checks the code array
1334 for conformance with the code patterns generated there.
1335
1336 The nice thing about the generated code is that it is position-independent:
1337 all jumps are relative jumps forward. Also, jumps don't cross each other:
1338 the target of a later jump is always earlier than the target of an earlier
1339 jump. IOW, this is okay:
1340
1341 J---------J-------T--------T
1342 \ \_____/ /
1343 \______________________/
1344
1345 but this is not:
1346
1347 J---------J-------T--------T
1348 \_________\_____/ /
1349 \____________/
1350
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001351 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001352*/
1353
1354/* Defining this one enables tracing of the validator */
1355#undef VVERBOSE
1356
1357/* Trace macro for the validator */
1358#if defined(VVERBOSE)
1359#define VTRACE(v) printf v
1360#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001361#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001362#endif
1363
1364/* Report failure */
1365#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1366
1367/* Extract opcode, argument, or skip count from code array */
1368#define GET_OP \
1369 do { \
1370 VTRACE(("%p: ", code)); \
1371 if (code >= end) FAIL; \
1372 op = *code++; \
1373 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1374 } while (0)
1375#define GET_ARG \
1376 do { \
1377 VTRACE(("%p= ", code)); \
1378 if (code >= end) FAIL; \
1379 arg = *code++; \
1380 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1381 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001382#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001383 do { \
1384 VTRACE(("%p= ", code)); \
1385 if (code >= end) FAIL; \
1386 skip = *code; \
1387 VTRACE(("%lu (skip to %p)\n", \
1388 (unsigned long)skip, code+skip)); \
Victor Stinner1fa174a2013-08-28 02:06:21 +02001389 if (skip-adj > (Py_uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001390 FAIL; \
1391 code++; \
1392 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001393#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001394
1395static int
1396_validate_charset(SRE_CODE *code, SRE_CODE *end)
1397{
1398 /* Some variables are manipulated by the macros above */
1399 SRE_CODE op;
1400 SRE_CODE arg;
1401 SRE_CODE offset;
1402 int i;
1403
1404 while (code < end) {
1405 GET_OP;
1406 switch (op) {
1407
1408 case SRE_OP_NEGATE:
1409 break;
1410
1411 case SRE_OP_LITERAL:
1412 GET_ARG;
1413 break;
1414
1415 case SRE_OP_RANGE:
1416 GET_ARG;
1417 GET_ARG;
1418 break;
1419
1420 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001421 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001422 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001423 FAIL;
1424 code += offset;
1425 break;
1426
1427 case SRE_OP_BIGCHARSET:
1428 GET_ARG; /* Number of blocks */
1429 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001430 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001431 FAIL;
1432 /* Make sure that each byte points to a valid block */
1433 for (i = 0; i < 256; i++) {
1434 if (((unsigned char *)code)[i] >= arg)
1435 FAIL;
1436 }
1437 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001438 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001439 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001440 FAIL;
1441 code += offset;
1442 break;
1443
1444 case SRE_OP_CATEGORY:
1445 GET_ARG;
1446 switch (arg) {
1447 case SRE_CATEGORY_DIGIT:
1448 case SRE_CATEGORY_NOT_DIGIT:
1449 case SRE_CATEGORY_SPACE:
1450 case SRE_CATEGORY_NOT_SPACE:
1451 case SRE_CATEGORY_WORD:
1452 case SRE_CATEGORY_NOT_WORD:
1453 case SRE_CATEGORY_LINEBREAK:
1454 case SRE_CATEGORY_NOT_LINEBREAK:
1455 case SRE_CATEGORY_LOC_WORD:
1456 case SRE_CATEGORY_LOC_NOT_WORD:
1457 case SRE_CATEGORY_UNI_DIGIT:
1458 case SRE_CATEGORY_UNI_NOT_DIGIT:
1459 case SRE_CATEGORY_UNI_SPACE:
1460 case SRE_CATEGORY_UNI_NOT_SPACE:
1461 case SRE_CATEGORY_UNI_WORD:
1462 case SRE_CATEGORY_UNI_NOT_WORD:
1463 case SRE_CATEGORY_UNI_LINEBREAK:
1464 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1465 break;
1466 default:
1467 FAIL;
1468 }
1469 break;
1470
1471 default:
1472 FAIL;
1473
1474 }
1475 }
1476
1477 return 1;
1478}
1479
1480static int
1481_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1482{
1483 /* Some variables are manipulated by the macros above */
1484 SRE_CODE op;
1485 SRE_CODE arg;
1486 SRE_CODE skip;
1487
1488 VTRACE(("code=%p, end=%p\n", code, end));
1489
1490 if (code > end)
1491 FAIL;
1492
1493 while (code < end) {
1494 GET_OP;
1495 switch (op) {
1496
1497 case SRE_OP_MARK:
1498 /* We don't check whether marks are properly nested; the
1499 sre_match() code is robust even if they don't, and the worst
1500 you can get is nonsensical match results. */
1501 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001502 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001503 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1504 FAIL;
1505 }
1506 break;
1507
1508 case SRE_OP_LITERAL:
1509 case SRE_OP_NOT_LITERAL:
1510 case SRE_OP_LITERAL_IGNORE:
1511 case SRE_OP_NOT_LITERAL_IGNORE:
1512 GET_ARG;
1513 /* The arg is just a character, nothing to check */
1514 break;
1515
1516 case SRE_OP_SUCCESS:
1517 case SRE_OP_FAILURE:
1518 /* Nothing to check; these normally end the matching process */
1519 break;
1520
1521 case SRE_OP_AT:
1522 GET_ARG;
1523 switch (arg) {
1524 case SRE_AT_BEGINNING:
1525 case SRE_AT_BEGINNING_STRING:
1526 case SRE_AT_BEGINNING_LINE:
1527 case SRE_AT_END:
1528 case SRE_AT_END_LINE:
1529 case SRE_AT_END_STRING:
1530 case SRE_AT_BOUNDARY:
1531 case SRE_AT_NON_BOUNDARY:
1532 case SRE_AT_LOC_BOUNDARY:
1533 case SRE_AT_LOC_NON_BOUNDARY:
1534 case SRE_AT_UNI_BOUNDARY:
1535 case SRE_AT_UNI_NON_BOUNDARY:
1536 break;
1537 default:
1538 FAIL;
1539 }
1540 break;
1541
1542 case SRE_OP_ANY:
1543 case SRE_OP_ANY_ALL:
1544 /* These have no operands */
1545 break;
1546
1547 case SRE_OP_IN:
1548 case SRE_OP_IN_IGNORE:
1549 GET_SKIP;
1550 /* Stop 1 before the end; we check the FAILURE below */
1551 if (!_validate_charset(code, code+skip-2))
1552 FAIL;
1553 if (code[skip-2] != SRE_OP_FAILURE)
1554 FAIL;
1555 code += skip-1;
1556 break;
1557
1558 case SRE_OP_INFO:
1559 {
1560 /* A minimal info field is
1561 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1562 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1563 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001564 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001565 SRE_CODE *newcode;
1566 GET_SKIP;
1567 newcode = code+skip-1;
1568 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001569 GET_ARG;
1570 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001571 /* Check that only valid flags are present */
1572 if ((flags & ~(SRE_INFO_PREFIX |
1573 SRE_INFO_LITERAL |
1574 SRE_INFO_CHARSET)) != 0)
1575 FAIL;
1576 /* PREFIX and CHARSET are mutually exclusive */
1577 if ((flags & SRE_INFO_PREFIX) &&
1578 (flags & SRE_INFO_CHARSET))
1579 FAIL;
1580 /* LITERAL implies PREFIX */
1581 if ((flags & SRE_INFO_LITERAL) &&
1582 !(flags & SRE_INFO_PREFIX))
1583 FAIL;
1584 /* Validate the prefix */
1585 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001586 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001587 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001588 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001589 /* Here comes the prefix string */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001590 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001591 FAIL;
1592 code += prefix_len;
1593 /* And here comes the overlap table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001594 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001595 FAIL;
1596 /* Each overlap value should be < prefix_len */
1597 for (i = 0; i < prefix_len; i++) {
1598 if (code[i] >= prefix_len)
1599 FAIL;
1600 }
1601 code += prefix_len;
1602 }
1603 /* Validate the charset */
1604 if (flags & SRE_INFO_CHARSET) {
1605 if (!_validate_charset(code, newcode-1))
1606 FAIL;
1607 if (newcode[-1] != SRE_OP_FAILURE)
1608 FAIL;
1609 code = newcode;
1610 }
1611 else if (code != newcode) {
1612 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1613 FAIL;
1614 }
1615 }
1616 break;
1617
1618 case SRE_OP_BRANCH:
1619 {
1620 SRE_CODE *target = NULL;
1621 for (;;) {
1622 GET_SKIP;
1623 if (skip == 0)
1624 break;
1625 /* Stop 2 before the end; we check the JUMP below */
1626 if (!_validate_inner(code, code+skip-3, groups))
1627 FAIL;
1628 code += skip-3;
1629 /* Check that it ends with a JUMP, and that each JUMP
1630 has the same target */
1631 GET_OP;
1632 if (op != SRE_OP_JUMP)
1633 FAIL;
1634 GET_SKIP;
1635 if (target == NULL)
1636 target = code+skip-1;
1637 else if (code+skip-1 != target)
1638 FAIL;
1639 }
1640 }
1641 break;
1642
1643 case SRE_OP_REPEAT_ONE:
1644 case SRE_OP_MIN_REPEAT_ONE:
1645 {
1646 SRE_CODE min, max;
1647 GET_SKIP;
1648 GET_ARG; min = arg;
1649 GET_ARG; max = arg;
1650 if (min > max)
1651 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001652 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001653 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001654 if (!_validate_inner(code, code+skip-4, groups))
1655 FAIL;
1656 code += skip-4;
1657 GET_OP;
1658 if (op != SRE_OP_SUCCESS)
1659 FAIL;
1660 }
1661 break;
1662
1663 case SRE_OP_REPEAT:
1664 {
1665 SRE_CODE min, max;
1666 GET_SKIP;
1667 GET_ARG; min = arg;
1668 GET_ARG; max = arg;
1669 if (min > max)
1670 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001671 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001672 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001673 if (!_validate_inner(code, code+skip-3, groups))
1674 FAIL;
1675 code += skip-3;
1676 GET_OP;
1677 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1678 FAIL;
1679 }
1680 break;
1681
1682 case SRE_OP_GROUPREF:
1683 case SRE_OP_GROUPREF_IGNORE:
1684 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001685 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001686 FAIL;
1687 break;
1688
1689 case SRE_OP_GROUPREF_EXISTS:
1690 /* The regex syntax for this is: '(?(group)then|else)', where
1691 'group' is either an integer group number or a group name,
1692 'then' and 'else' are sub-regexes, and 'else' is optional. */
1693 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001694 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001695 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001696 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001697 code--; /* The skip is relative to the first arg! */
1698 /* There are two possibilities here: if there is both a 'then'
1699 part and an 'else' part, the generated code looks like:
1700
1701 GROUPREF_EXISTS
1702 <group>
1703 <skipyes>
1704 ...then part...
1705 JUMP
1706 <skipno>
1707 (<skipyes> jumps here)
1708 ...else part...
1709 (<skipno> jumps here)
1710
1711 If there is only a 'then' part, it looks like:
1712
1713 GROUPREF_EXISTS
1714 <group>
1715 <skip>
1716 ...then part...
1717 (<skip> jumps here)
1718
1719 There is no direct way to decide which it is, and we don't want
1720 to allow arbitrary jumps anywhere in the code; so we just look
1721 for a JUMP opcode preceding our skip target.
1722 */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001723 if (skip >= 3 && skip-3 < (Py_uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001724 code[skip-3] == SRE_OP_JUMP)
1725 {
1726 VTRACE(("both then and else parts present\n"));
1727 if (!_validate_inner(code+1, code+skip-3, groups))
1728 FAIL;
1729 code += skip-2; /* Position after JUMP, at <skipno> */
1730 GET_SKIP;
1731 if (!_validate_inner(code, code+skip-1, groups))
1732 FAIL;
1733 code += skip-1;
1734 }
1735 else {
1736 VTRACE(("only a then part present\n"));
1737 if (!_validate_inner(code+1, code+skip-1, groups))
1738 FAIL;
1739 code += skip-1;
1740 }
1741 break;
1742
1743 case SRE_OP_ASSERT:
1744 case SRE_OP_ASSERT_NOT:
1745 GET_SKIP;
1746 GET_ARG; /* 0 for lookahead, width for lookbehind */
1747 code--; /* Back up over arg to simplify math below */
1748 if (arg & 0x80000000)
1749 FAIL; /* Width too large */
1750 /* Stop 1 before the end; we check the SUCCESS below */
1751 if (!_validate_inner(code+1, code+skip-2, groups))
1752 FAIL;
1753 code += skip-2;
1754 GET_OP;
1755 if (op != SRE_OP_SUCCESS)
1756 FAIL;
1757 break;
1758
1759 default:
1760 FAIL;
1761
1762 }
1763 }
1764
1765 VTRACE(("okay\n"));
1766 return 1;
1767}
1768
1769static int
1770_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1771{
1772 if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
1773 FAIL;
1774 if (groups == 0) /* fix for simplejson */
1775 groups = 100; /* 100 groups should always be safe */
1776 return _validate_inner(code, end-1, groups);
1777}
1778
1779static int
1780_validate(PatternObject *self)
1781{
1782 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1783 {
1784 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1785 return 0;
1786 }
1787 else
1788 VTRACE(("Success!\n"));
1789 return 1;
1790}
1791
1792/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001793/* match methods */
1794
1795static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001796match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001797{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001798 Py_XDECREF(self->regs);
1799 Py_XDECREF(self->string);
1800 Py_DECREF(self->pattern);
1801 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001802}
1803
1804static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001805match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001806{
Serhiy Storchaka25324972013-10-16 12:46:28 +03001807 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001808 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001809 Py_buffer view;
1810 PyObject *result;
1811 void* ptr;
1812
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001813 if (index < 0 || index >= self->groups) {
1814 /* raise IndexError if we were given a bad group number */
1815 PyErr_SetString(
1816 PyExc_IndexError,
1817 "no such group"
1818 );
1819 return NULL;
1820 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001821
Fredrik Lundh6f013982000-07-03 18:44:21 +00001822 index *= 2;
1823
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001824 if (self->string == Py_None || self->mark[index] < 0) {
1825 /* return default value if the string or group is undefined */
1826 Py_INCREF(def);
1827 return def;
1828 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001829
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001830 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001831 if (ptr == NULL)
1832 return NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001833 result = getslice(isbytes, ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001834 self->string, self->mark[index], self->mark[index+1]);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001835 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001836 PyBuffer_Release(&view);
1837 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001838}
1839
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001840static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001841match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001842{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001843 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001844
Guido van Rossumddefaf32007-01-14 03:31:43 +00001845 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001846 /* Default value */
1847 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001848
Christian Heimes217cfd12007-12-02 14:31:20 +00001849 if (PyLong_Check(index))
1850 return PyLong_AsSsize_t(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001851
Fredrik Lundh6f013982000-07-03 18:44:21 +00001852 i = -1;
1853
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001854 if (self->pattern->groupindex) {
1855 index = PyObject_GetItem(self->pattern->groupindex, index);
1856 if (index) {
Neal Norwitz1fe5f382007-08-31 04:32:55 +00001857 if (PyLong_Check(index))
Christian Heimes217cfd12007-12-02 14:31:20 +00001858 i = PyLong_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001859 Py_DECREF(index);
1860 } else
1861 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001862 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001863
1864 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001865}
1866
1867static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001868match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001869{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001870 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001871}
1872
1873static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001874match_expand(MatchObject* self, PyObject* ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001875{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001876 /* delegate to Python code */
1877 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001878 SRE_PY_MODULE, "_expand",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001879 PyTuple_Pack(3, self->pattern, self, ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001880 );
1881}
1882
1883static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001884match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001885{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001886 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001887 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001888
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001889 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001890
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001891 switch (size) {
1892 case 0:
1893 result = match_getslice(self, Py_False, Py_None);
1894 break;
1895 case 1:
1896 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
1897 break;
1898 default:
1899 /* fetch multiple items */
1900 result = PyTuple_New(size);
1901 if (!result)
1902 return NULL;
1903 for (i = 0; i < size; i++) {
1904 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001905 self, PyTuple_GET_ITEM(args, i), Py_None
1906 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001907 if (!item) {
1908 Py_DECREF(result);
1909 return NULL;
1910 }
1911 PyTuple_SET_ITEM(result, i, item);
1912 }
1913 break;
1914 }
1915 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001916}
1917
1918static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001919match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001920{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001921 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001922 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001923
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001924 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001925 static char* kwlist[] = { "default", NULL };
Fredrik Lundh562586e2000-10-03 20:43:34 +00001926 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001927 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001928
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001929 result = PyTuple_New(self->groups-1);
1930 if (!result)
1931 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001932
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001933 for (index = 1; index < self->groups; index++) {
1934 PyObject* item;
1935 item = match_getslice_by_index(self, index, def);
1936 if (!item) {
1937 Py_DECREF(result);
1938 return NULL;
1939 }
1940 PyTuple_SET_ITEM(result, index-1, item);
1941 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001942
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001943 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001944}
1945
1946static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001947match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001948{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001949 PyObject* result;
1950 PyObject* keys;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001951 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001952
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001953 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001954 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00001955 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001956 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001957
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001958 result = PyDict_New();
1959 if (!result || !self->pattern->groupindex)
1960 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001961
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001962 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00001963 if (!keys)
1964 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00001965
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001966 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00001967 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001968 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001969 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001970 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00001971 if (!key)
1972 goto failed;
1973 value = match_getslice(self, key, def);
1974 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001975 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00001976 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001977 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00001978 status = PyDict_SetItem(result, key, value);
1979 Py_DECREF(value);
1980 if (status < 0)
1981 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001982 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001983
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001984 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00001985
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001986 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001987
1988failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00001989 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00001990 Py_DECREF(result);
1991 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001992}
1993
1994static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001995match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001996{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001997 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001998
Guido van Rossumddefaf32007-01-14 03:31:43 +00001999 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002000 if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002001 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002002
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002003 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002004
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002005 if (index < 0 || index >= self->groups) {
2006 PyErr_SetString(
2007 PyExc_IndexError,
2008 "no such group"
2009 );
2010 return NULL;
2011 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002012
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002013 /* mark is -1 if group is undefined */
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002014 return PyLong_FromSsize_t(self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002015}
2016
2017static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002018match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002019{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002020 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002021
Guido van Rossumddefaf32007-01-14 03:31:43 +00002022 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002023 if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002024 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002025
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002026 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002027
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002028 if (index < 0 || index >= self->groups) {
2029 PyErr_SetString(
2030 PyExc_IndexError,
2031 "no such group"
2032 );
2033 return NULL;
2034 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002035
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002036 /* mark is -1 if group is undefined */
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002037 return PyLong_FromSsize_t(self->mark[index*2+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002038}
2039
2040LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002041_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002042{
2043 PyObject* pair;
2044 PyObject* item;
2045
2046 pair = PyTuple_New(2);
2047 if (!pair)
2048 return NULL;
2049
Christian Heimes217cfd12007-12-02 14:31:20 +00002050 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002051 if (!item)
2052 goto error;
2053 PyTuple_SET_ITEM(pair, 0, item);
2054
Christian Heimes217cfd12007-12-02 14:31:20 +00002055 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002056 if (!item)
2057 goto error;
2058 PyTuple_SET_ITEM(pair, 1, item);
2059
2060 return pair;
2061
2062 error:
2063 Py_DECREF(pair);
2064 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002065}
2066
2067static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002068match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002069{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002070 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002071
Guido van Rossumddefaf32007-01-14 03:31:43 +00002072 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002073 if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002074 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002075
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002076 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002077
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002078 if (index < 0 || index >= self->groups) {
2079 PyErr_SetString(
2080 PyExc_IndexError,
2081 "no such group"
2082 );
2083 return NULL;
2084 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002085
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002086 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002087 return _pair(self->mark[index*2], self->mark[index*2+1]);
2088}
2089
2090static PyObject*
2091match_regs(MatchObject* self)
2092{
2093 PyObject* regs;
2094 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002095 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002096
2097 regs = PyTuple_New(self->groups);
2098 if (!regs)
2099 return NULL;
2100
2101 for (index = 0; index < self->groups; index++) {
2102 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2103 if (!item) {
2104 Py_DECREF(regs);
2105 return NULL;
2106 }
2107 PyTuple_SET_ITEM(regs, index, item);
2108 }
2109
2110 Py_INCREF(regs);
2111 self->regs = regs;
2112
2113 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002114}
2115
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002116static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002117match_copy(MatchObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002118{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002119#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002120 MatchObject* copy;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002121 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00002122
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002123 slots = 2 * (self->pattern->groups+1);
2124
2125 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2126 if (!copy)
2127 return NULL;
2128
2129 /* this value a constant, but any compiler should be able to
2130 figure that out all by itself */
2131 offset = offsetof(MatchObject, string);
2132
2133 Py_XINCREF(self->pattern);
2134 Py_XINCREF(self->string);
2135 Py_XINCREF(self->regs);
2136
2137 memcpy((char*) copy + offset, (char*) self + offset,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002138 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002139
2140 return (PyObject*) copy;
2141#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002142 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002143 return NULL;
2144#endif
2145}
2146
2147static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002148match_deepcopy(MatchObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002149{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002150#ifdef USE_BUILTIN_COPY
2151 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002152
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002153 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002154 if (!copy)
2155 return NULL;
2156
2157 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2158 !deepcopy(&copy->string, memo) ||
2159 !deepcopy(&copy->regs, memo)) {
2160 Py_DECREF(copy);
2161 return NULL;
2162 }
2163
2164#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002165 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2166 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002167#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002168}
2169
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002170PyDoc_STRVAR(match_doc,
2171"The result of re.match() and re.search().\n\
2172Match objects always have a boolean value of True.");
2173
2174PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002175"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002176 Return subgroup(s) of the match by indices or names.\n\
2177 For 0 returns the entire match.");
2178
2179PyDoc_STRVAR(match_start_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002180"start([group=0]) -> int.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002181 Return index of the start of the substring matched by group.");
2182
2183PyDoc_STRVAR(match_end_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002184"end([group=0]) -> int.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002185 Return index of the end of the substring matched by group.");
2186
2187PyDoc_STRVAR(match_span_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002188"span([group]) -> tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002189 For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).");
2190
2191PyDoc_STRVAR(match_groups_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002192"groups([default=None]) -> tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002193 Return a tuple containing all the subgroups of the match, from 1.\n\
2194 The default argument is used for groups\n\
2195 that did not participate in the match");
2196
2197PyDoc_STRVAR(match_groupdict_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002198"groupdict([default=None]) -> dict.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002199 Return a dictionary containing all the named subgroups of the match,\n\
2200 keyed by the subgroup name. The default argument is used for groups\n\
2201 that did not participate in the match");
2202
2203PyDoc_STRVAR(match_expand_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002204"expand(template) -> str.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002205 Return the string obtained by doing backslash substitution\n\
2206 on the string template, as done by the sub() method.");
2207
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002208static PyMethodDef match_methods[] = {
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002209 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2210 {"start", (PyCFunction) match_start, METH_VARARGS, match_start_doc},
2211 {"end", (PyCFunction) match_end, METH_VARARGS, match_end_doc},
2212 {"span", (PyCFunction) match_span, METH_VARARGS, match_span_doc},
2213 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS,
2214 match_groups_doc},
2215 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS,
2216 match_groupdict_doc},
2217 {"expand", (PyCFunction) match_expand, METH_O, match_expand_doc},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002218 {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
2219 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002220 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002221};
2222
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002223static PyObject *
2224match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002225{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002226 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002227 return PyLong_FromSsize_t(self->lastindex);
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002228 Py_INCREF(Py_None);
2229 return Py_None;
Guido van Rossumb700df92000-03-31 14:59:30 +00002230}
2231
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002232static PyObject *
2233match_lastgroup_get(MatchObject *self)
2234{
2235 if (self->pattern->indexgroup && self->lastindex >= 0) {
2236 PyObject* result = PySequence_GetItem(
2237 self->pattern->indexgroup, self->lastindex
2238 );
2239 if (result)
2240 return result;
2241 PyErr_Clear();
2242 }
2243 Py_INCREF(Py_None);
2244 return Py_None;
2245}
2246
2247static PyObject *
2248match_regs_get(MatchObject *self)
2249{
2250 if (self->regs) {
2251 Py_INCREF(self->regs);
2252 return self->regs;
2253 } else
2254 return match_regs(self);
2255}
2256
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002257static PyObject *
2258match_repr(MatchObject *self)
2259{
2260 PyObject *result;
2261 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2262 if (group0 == NULL)
2263 return NULL;
2264 result = PyUnicode_FromFormat(
2265 "<%s object; span=(%d, %d), match=%.50R>",
2266 Py_TYPE(self)->tp_name,
2267 self->mark[0], self->mark[1], group0);
2268 Py_DECREF(group0);
2269 return result;
2270}
2271
2272
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002273static PyGetSetDef match_getset[] = {
2274 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
2275 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
2276 {"regs", (getter)match_regs_get, (setter)NULL},
2277 {NULL}
2278};
2279
2280#define MATCH_OFF(x) offsetof(MatchObject, x)
2281static PyMemberDef match_members[] = {
2282 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
2283 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
2284 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
2285 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
2286 {NULL}
2287};
2288
Guido van Rossumb700df92000-03-31 14:59:30 +00002289/* FIXME: implement setattr("string", None) as a special case (to
2290 detach the associated string, if any */
2291
Neal Norwitz57c179c2006-03-22 07:18:02 +00002292static PyTypeObject Match_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002293 PyVarObject_HEAD_INIT(NULL,0)
2294 "_" SRE_MODULE ".SRE_Match",
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002295 sizeof(MatchObject), sizeof(Py_ssize_t),
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002296 (destructor)match_dealloc, /* tp_dealloc */
2297 0, /* tp_print */
2298 0, /* tp_getattr */
2299 0, /* tp_setattr */
2300 0, /* tp_reserved */
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002301 (reprfunc)match_repr, /* tp_repr */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002302 0, /* tp_as_number */
2303 0, /* tp_as_sequence */
2304 0, /* tp_as_mapping */
2305 0, /* tp_hash */
2306 0, /* tp_call */
2307 0, /* tp_str */
2308 0, /* tp_getattro */
2309 0, /* tp_setattro */
2310 0, /* tp_as_buffer */
2311 Py_TPFLAGS_DEFAULT, /* tp_flags */
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002312 match_doc, /* tp_doc */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002313 0, /* tp_traverse */
2314 0, /* tp_clear */
2315 0, /* tp_richcompare */
2316 0, /* tp_weaklistoffset */
2317 0, /* tp_iter */
2318 0, /* tp_iternext */
2319 match_methods, /* tp_methods */
2320 match_members, /* tp_members */
2321 match_getset, /* tp_getset */
Guido van Rossumb700df92000-03-31 14:59:30 +00002322};
2323
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002324static PyObject*
2325pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
2326{
2327 /* create match object (from state object) */
2328
2329 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002330 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002331 char* base;
2332 int n;
2333
2334 if (status > 0) {
2335
2336 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002337 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002338 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2339 2*(pattern->groups+1));
2340 if (!match)
2341 return NULL;
2342
2343 Py_INCREF(pattern);
2344 match->pattern = pattern;
2345
2346 Py_INCREF(state->string);
2347 match->string = state->string;
2348
2349 match->regs = NULL;
2350 match->groups = pattern->groups+1;
2351
2352 /* fill in group slices */
2353
2354 base = (char*) state->beginning;
2355 n = state->charsize;
2356
2357 match->mark[0] = ((char*) state->start - base) / n;
2358 match->mark[1] = ((char*) state->ptr - base) / n;
2359
2360 for (i = j = 0; i < pattern->groups; i++, j+=2)
2361 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2362 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2363 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2364 } else
2365 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2366
2367 match->pos = state->pos;
2368 match->endpos = state->endpos;
2369
2370 match->lastindex = state->lastindex;
2371
2372 return (PyObject*) match;
2373
2374 } else if (status == 0) {
2375
2376 /* no match */
2377 Py_INCREF(Py_None);
2378 return Py_None;
2379
2380 }
2381
2382 /* internal error */
2383 pattern_error(status);
2384 return NULL;
2385}
2386
2387
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002388/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002389/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002390
2391static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002392scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002393{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002394 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002395 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002396 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002397}
2398
2399static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002400scanner_match(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002401{
2402 SRE_STATE* state = &self->state;
2403 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002404 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002405
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002406 state_reset(state);
2407
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002408 state->ptr = state->start;
2409
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002410 status = sre_match(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002411 if (PyErr_Occurred())
2412 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002413
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002414 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002415 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002416
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00002417 if (status == 0 || state->ptr == state->start)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002418 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002419 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002420 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002421
2422 return match;
2423}
2424
2425
2426static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002427scanner_search(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002428{
2429 SRE_STATE* state = &self->state;
2430 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002431 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002432
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002433 state_reset(state);
2434
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002435 state->ptr = state->start;
2436
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002437 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002438 if (PyErr_Occurred())
2439 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002440
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002441 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002442 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002443
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00002444 if (status == 0 || state->ptr == state->start)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002445 state->start = (void*) ((char*) state->ptr + state->charsize);
2446 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002447 state->start = state->ptr;
2448
2449 return match;
2450}
2451
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002452static PyMethodDef scanner_methods[] = {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002453 {"match", (PyCFunction) scanner_match, METH_NOARGS},
2454 {"search", (PyCFunction) scanner_search, METH_NOARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002455 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002456};
2457
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002458#define SCAN_OFF(x) offsetof(ScannerObject, x)
2459static PyMemberDef scanner_members[] = {
Ezio Melotti7c8c1ea2011-09-29 01:00:19 +03002460 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002461 {NULL} /* Sentinel */
2462};
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002463
Neal Norwitz57c179c2006-03-22 07:18:02 +00002464static PyTypeObject Scanner_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002465 PyVarObject_HEAD_INIT(NULL, 0)
2466 "_" SRE_MODULE ".SRE_Scanner",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002467 sizeof(ScannerObject), 0,
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002468 (destructor)scanner_dealloc,/* tp_dealloc */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002469 0, /* tp_print */
2470 0, /* tp_getattr */
2471 0, /* tp_setattr */
2472 0, /* tp_reserved */
2473 0, /* tp_repr */
2474 0, /* tp_as_number */
2475 0, /* tp_as_sequence */
2476 0, /* tp_as_mapping */
2477 0, /* tp_hash */
2478 0, /* tp_call */
2479 0, /* tp_str */
2480 0, /* tp_getattro */
2481 0, /* tp_setattro */
2482 0, /* tp_as_buffer */
2483 Py_TPFLAGS_DEFAULT, /* tp_flags */
2484 0, /* tp_doc */
2485 0, /* tp_traverse */
2486 0, /* tp_clear */
2487 0, /* tp_richcompare */
2488 0, /* tp_weaklistoffset */
2489 0, /* tp_iter */
2490 0, /* tp_iternext */
2491 scanner_methods, /* tp_methods */
2492 scanner_members, /* tp_members */
2493 0, /* tp_getset */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002494};
2495
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002496static PyObject*
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06002497pattern_scanner(PatternObject* pattern, PyObject* args, PyObject* kw)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002498{
2499 /* create search state object */
2500
2501 ScannerObject* self;
2502
2503 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002504 Py_ssize_t start = 0;
2505 Py_ssize_t end = PY_SSIZE_T_MAX;
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06002506 static char* kwlist[] = { "source", "pos", "endpos", NULL };
2507 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:scanner", kwlist,
2508 &string, &start, &end))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002509 return NULL;
2510
2511 /* create scanner object */
2512 self = PyObject_NEW(ScannerObject, &Scanner_Type);
2513 if (!self)
2514 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002515 self->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002516
2517 string = state_init(&self->state, pattern, string, start, end);
2518 if (!string) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002519 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002520 return NULL;
2521 }
2522
2523 Py_INCREF(pattern);
2524 self->pattern = (PyObject*) pattern;
2525
2526 return (PyObject*) self;
2527}
2528
Guido van Rossumb700df92000-03-31 14:59:30 +00002529static PyMethodDef _functions[] = {
Neal Norwitzb0493252002-03-31 14:44:22 +00002530 {"compile", _compile, METH_VARARGS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002531 {"getcodesize", sre_codesize, METH_NOARGS},
Neal Norwitzb0493252002-03-31 14:44:22 +00002532 {"getlower", sre_getlower, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002533 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002534};
2535
Martin v. Löwis1a214512008-06-11 05:26:20 +00002536static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002537 PyModuleDef_HEAD_INIT,
2538 "_" SRE_MODULE,
2539 NULL,
2540 -1,
2541 _functions,
2542 NULL,
2543 NULL,
2544 NULL,
2545 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002546};
2547
2548PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002549{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002550 PyObject* m;
2551 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002552 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002553
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002554 /* Patch object types */
2555 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2556 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002557 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002558
Martin v. Löwis1a214512008-06-11 05:26:20 +00002559 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002560 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002561 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002562 d = PyModule_GetDict(m);
2563
Christian Heimes217cfd12007-12-02 14:31:20 +00002564 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002565 if (x) {
2566 PyDict_SetItemString(d, "MAGIC", x);
2567 Py_DECREF(x);
2568 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002569
Christian Heimes217cfd12007-12-02 14:31:20 +00002570 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002571 if (x) {
2572 PyDict_SetItemString(d, "CODESIZE", x);
2573 Py_DECREF(x);
2574 }
2575
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002576 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2577 if (x) {
2578 PyDict_SetItemString(d, "MAXREPEAT", x);
2579 Py_DECREF(x);
2580 }
2581
Neal Norwitzfe537132007-08-26 03:55:15 +00002582 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002583 if (x) {
2584 PyDict_SetItemString(d, "copyright", x);
2585 Py_DECREF(x);
2586 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002587 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002588}
2589
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002590/* vim:ts=4:sw=4:et
2591*/