Issue #1160: Fix compiling large regular expressions on UCS2 builds.
Patch by Serhiy Storchaka.
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 0bceaa2..ff2c953 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -425,6 +425,12 @@
self.assertEqual(re.match(u"([\u2222\u2223])",
u"\u2222", re.UNICODE).group(1), u"\u2222")
+ def test_big_codesize(self):
+ # Issue #1160
+ r = re.compile('|'.join(('%d'%x for x in range(10000))))
+ self.assertIsNotNone(r.match('1000'))
+ self.assertIsNotNone(r.match('9999'))
+
def test_anyall(self):
self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
"a\nb")
diff --git a/Misc/NEWS b/Misc/NEWS
index b5b7a50..f69dd3c 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -151,6 +151,9 @@
Library
-------
+- Issue #1160: Fix compiling large regular expressions on UCS2 builds.
+ Patch by Serhiy Storchaka.
+
- Issue #14313: zipfile now raises NotImplementedError when the compression
type is unknown.
diff --git a/Modules/_sre.c b/Modules/_sre.c
index cd95917..ab4f269 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -2675,6 +2675,13 @@
PyObject *o = PyList_GET_ITEM(code, i);
unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o)
: PyLong_AsUnsignedLong(o);
+ if (value == (unsigned long)-1 && PyErr_Occurred()) {
+ if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
+ PyErr_SetString(PyExc_OverflowError,
+ "regular expression code size limit exceeded");
+ }
+ break;
+ }
self->code[i] = (SRE_CODE) value;
if ((unsigned long) self->code[i] != value) {
PyErr_SetString(PyExc_OverflowError,
@@ -3035,10 +3042,8 @@
GET_ARG; max = arg;
if (min > max)
FAIL;
-#ifdef Py_UNICODE_WIDE
if (max > 65535)
FAIL;
-#endif
if (!_validate_inner(code, code+skip-4, groups))
FAIL;
code += skip-4;
@@ -3056,10 +3061,8 @@
GET_ARG; max = arg;
if (min > max)
FAIL;
-#ifdef Py_UNICODE_WIDE
if (max > 65535)
FAIL;
-#endif
if (!_validate_inner(code, code+skip-3, groups))
FAIL;
code += skip-3;
diff --git a/Modules/sre.h b/Modules/sre.h
index d4af05c..9bfdf7f 100644
--- a/Modules/sre.h
+++ b/Modules/sre.h
@@ -14,12 +14,8 @@
#include "sre_constants.h"
/* size of a code word (must be unsigned short or larger, and
- large enough to hold a Py_UNICODE character) */
-#ifdef Py_UNICODE_WIDE
+ large enough to hold a UCS4 character) */
#define SRE_CODE Py_UCS4
-#else
-#define SRE_CODE unsigned short
-#endif
typedef struct {
PyObject_VAR_HEAD