Merged revisions 78729 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k

........
  r78729 | ezio.melotti | 2010-03-06 17:24:08 +0200 (Sat, 06 Mar 2010) | 1 line

  #6509: fix re.sub to work properly when the pattern, the string, and the replacement were all bytes. Patch by Antoine Pitrou.
........
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index bc71b58..13737ca 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -786,12 +786,18 @@
     groups = []
     groupsappend = groups.append
     literals = [None] * len(p)
+    if isinstance(source, str):
+        encode = lambda x: x
+    else:
+        # The tokenizer implicitly decodes bytes objects as latin-1, we must
+        # therefore re-encode the final representation.
+        encode = lambda x: x.encode('latin1')
     for c, s in p:
         if c is MARK:
             groupsappend((i, s))
             # literal[i] is already None
         else:
-            literals[i] = s
+            literals[i] = encode(s)
         i = i + 1
     return groups, literals
 
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 99cc47b..44b5dfe 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -696,6 +696,24 @@
         self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
         self.assertRaises(ValueError, re.compile, '(?au)\w')
 
+    def test_bug_6509(self):
+        # Replacement strings of both types must parse properly.
+        # all strings
+        pat = re.compile('a(\w)')
+        self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
+        pat = re.compile('a(.)')
+        self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
+        pat = re.compile('..')
+        self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
+
+        # all bytes
+        pat = re.compile(b'a(\w)')
+        self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
+        pat = re.compile(b'a(.)')
+        self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
+        pat = re.compile(b'..')
+        self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
+
     def test_dealloc(self):
         # issue 3299: check for segfault in debug build
         import _sre