Applied patch #725106, by Greg Chapman, fixing capturing groups
within repeats of alternatives. The only change to the original
patch was to convert the tests to the new test_re.py file.
This patch fixes cases like:
>>> re.match('((a)|b)*', 'abc').groups()
('b', '')
Which is wrong (it's impossible to match the empty string),
and incompatible with other regex systems, like the following
examples show:
% perl -e '"abc" =~ /^((a)|b)*/; print "$1 $2\n";'
b a
% echo "abc" | sed -r -e "s/^((a)|b)*/\1 \2|/"
b a|c
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 2430790..7ba9a1b 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -276,6 +276,25 @@
self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
('a:', 'a'))
+ def test_bug_725106(self):
+ # capturing groups in alternatives in repeats
+ self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
+ ('b', 'a'))
+ self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
+ ('c', 'b'))
+ self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
+ ('b', None))
+ self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
+ ('b', None))
+ self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
+ ('b', 'a'))
+ self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
+ ('c', 'b'))
+ self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
+ ('b', None))
+ self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
+ ('b', None))
+
def test_finditer(self):
iter = re.finditer(r":+", "a:b::c:::d")
self.assertEqual([item.group(0) for item in iter],
diff --git a/Modules/_sre.c b/Modules/_sre.c
index 3f17d13..b9e1827 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -947,10 +947,20 @@
if (pattern[1] == SRE_OP_IN &&
(ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
continue;
+ if (state->repeat) {
+ i = mark_save(state, 0, lastmark);
+ if (i < 0)
+ return i;
+ }
state->ptr = ptr;
i = SRE_MATCH(state, pattern + 1, level + 1);
if (i)
return i;
+ if (state->repeat) {
+ i = mark_restore(state, 0, lastmark);
+ if (i < 0)
+ return i;
+ }
LASTMARK_RESTORE();
}
return 0;