Issue #22437: Number of capturing groups in regular expression is no longer
limited by 100.
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index c6860b5..d4d129b 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -470,12 +470,6 @@
# print code
- # XXX: <fl> get rid of this limitation!
- if p.pattern.groups > 100:
- raise AssertionError(
- "sorry, but this version only supports 100 named groups"
- )
-
# map in either direction
groupindex = p.pattern.groupdict
indexgroup = [None] * p.pattern.groups
diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py
index 23e3516..8815d1d 100644
--- a/Lib/sre_constants.py
+++ b/Lib/sre_constants.py
@@ -15,7 +15,7 @@
MAGIC = 20031017
-from _sre import MAXREPEAT
+from _sre import MAXREPEAT, MAXGROUPS
# SRE standard exception (access as sre.error)
# should this really be here?
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index 7fd145b..b9a1852 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -72,6 +72,8 @@
def opengroup(self, name=None):
gid = self.groups
self.groups = gid + 1
+ if self.groups > MAXGROUPS:
+ raise error("groups number is too large")
if name is not None:
ogid = self.groupdict.get(name, None)
if ogid is not None:
@@ -695,8 +697,14 @@
else:
try:
condgroup = int(condname)
+ if condgroup < 0:
+ raise ValueError
except ValueError:
raise error("bad character in group name")
+ if not condgroup:
+ raise error("bad group number")
+ if condgroup >= MAXGROUPS:
+ raise error("the group number is too large")
else:
# flags
if not source.next in FLAGS:
@@ -822,6 +830,8 @@
index = int(name)
if index < 0:
raise error("negative group number")
+ if index >= MAXGROUPS:
+ raise error("the group number is too large")
except ValueError:
if not name.isidentifier():
raise error("bad character in group name")
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index d85b767..e5ad6cb 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -193,6 +193,7 @@
def test_symbolic_groups(self):
re.compile('(?P<a>x)(?P=a)(?(a)y)')
re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
+ re.compile('(?P<a1>x)\1(?(1)y)')
self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
self.assertRaises(re.error, re.compile, '(?Px)')
self.assertRaises(re.error, re.compile, '(?P=)')
@@ -212,6 +213,10 @@
re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
self.assertRaises(re.error, re.compile, '(?P<©>x)')
+ # Support > 100 groups.
+ pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
+ pat = '(?:%s)(?(200)z|t)' % pat
+ self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
def test_symbolic_refs(self):
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
@@ -228,6 +233,9 @@
self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<©>', 'xx')
+ # Support > 100 groups.
+ pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
+ self.assertEqual(re.sub(pat, '\g<200>', 'xc8yzxc8y'), 'c8zc8')
def test_re_subn(self):
self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
@@ -404,6 +412,10 @@
self.assertIsNone(p.match('abd'))
self.assertIsNone(p.match('ac'))
+ # Support > 100 groups.
+ pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
+ pat = '(?:%s)(?(200)z)' % pat
+ self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
def test_re_groupref(self):
self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
@@ -1070,8 +1082,10 @@
# a RuntimeError is raised instead of OverflowError.
long_overflow = 2**128
self.assertRaises(TypeError, re.finditer, "a", {})
- self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
- self.assertRaises(TypeError, _sre.compile, {}, 0, [])
+ with self.assertRaises(OverflowError):
+ _sre.compile("abc", 0, [long_overflow], 0, [], [])
+ with self.assertRaises(TypeError):
+ _sre.compile({}, 0, [], 0, [], [])
def test_search_dot_unicode(self):
self.assertTrue(re.search("123.*-", '123abc-'))