Patch # 1140 (my code, approved by Effbot).
Make sure the type of the return value of re.sub(x, y, z) is the type
of y+x (i.e. unicode if either is unicode, str if they are both str)
even if there are no substitutions or if x==z (which triggered various
special cases in join_list()).
Could be backported to 2.5; no need to port to 3.0.
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index cfb949c..aa403ba 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -83,6 +83,31 @@
self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
'abc\ndef\n')
+ def test_bug_1140(self):
+ # re.sub(x, y, u'') should return u'', not '', and
+ # re.sub(x, y, '') should return '', not u''.
+ # Also:
+ # re.sub(x, y, unicode(x)) should return unicode(y), and
+ # re.sub(x, y, str(x)) should return
+ # str(y) if isinstance(y, str) else unicode(y).
+ for x in 'x', u'x':
+ for y in 'y', u'y':
+ z = re.sub(x, y, u'')
+ self.assertEqual(z, u'')
+ self.assertEqual(type(z), unicode)
+ #
+ z = re.sub(x, y, '')
+ self.assertEqual(z, '')
+ self.assertEqual(type(z), str)
+ #
+ z = re.sub(x, y, unicode(x))
+ self.assertEqual(z, y)
+ self.assertEqual(type(z), unicode)
+ #
+ z = re.sub(x, y, str(x))
+ self.assertEqual(z, y)
+ self.assertEqual(type(z), type(y))
+
def test_sub_template_numeric_escape(self):
# bug 776311 and friends
self.assertEqual(re.sub('x', r'\0', 'x'), '\0')