Issue #12016: Multibyte CJK decoders now resynchronize faster

They only ignore the first byte of an invalid byte sequence.

For example, b'\xff\n'.decode('gb2312', 'replace') gives '\ufffd\n' instead of
'\ufffd'.
diff --git a/Lib/test/test_codecencodings_cn.py b/Lib/test/test_codecencodings_cn.py
index dca9f10..ee3d165 100644
--- a/Lib/test/test_codecencodings_cn.py
+++ b/Lib/test/test_codecencodings_cn.py
@@ -15,8 +15,8 @@
         # invalid bytes
         (b"abc\x81\x81\xc1\xc4", "strict",  None),
         (b"abc\xc8", "strict",  None),
-        (b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\u804a"),
-        (b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
+        (b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
+        (b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
         (b"abc\x81\x81\xc1\xc4", "ignore",  "abc\u804a"),
         (b"\xc1\x64", "strict", None),
     )
@@ -28,8 +28,8 @@
         # invalid bytes
         (b"abc\x80\x80\xc1\xc4", "strict",  None),
         (b"abc\xc8", "strict",  None),
-        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
-        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
+        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
+        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
         (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u804a"),
         (b"\x83\x34\x83\x31", "strict", None),
         ("\u30fb", "strict", None),
@@ -42,11 +42,14 @@
         # invalid bytes
         (b"abc\x80\x80\xc1\xc4", "strict",  None),
         (b"abc\xc8", "strict",  None),
-        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
-        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
+        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
+        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
         (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u804a"),
-        (b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd\u804a"),
+        (b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd9\ufffd9\u804a"),
         ("\u30fb", "strict", b"\x819\xa79"),
+        (b"abc\x84\x32\x80\x80def", "replace", 'abc\ufffd2\ufffd\ufffddef'),
+        (b"abc\x81\x30\x81\x30def", "strict", 'abc\x80def'),
+        (b"abc\x86\x30\x81\x30def", "replace", 'abc\ufffd0\ufffd0def'),
     )
     has_iso10646 = True
 
@@ -74,9 +77,11 @@
          '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
          'Bye.\n'),
         # invalid bytes
-        (b'ab~cd', 'replace', 'ab\uFFFDd'),
+        (b'ab~cd', 'replace', 'ab\uFFFDcd'),
         (b'ab\xffcd', 'replace', 'ab\uFFFDcd'),
         (b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'),
+        (b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'),
+        (b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"),
     )
 
 def test_main():
diff --git a/Lib/test/test_codecencodings_hk.py b/Lib/test/test_codecencodings_hk.py
index ccdc0b4..520df43 100644
--- a/Lib/test/test_codecencodings_hk.py
+++ b/Lib/test/test_codecencodings_hk.py
@@ -15,8 +15,8 @@
         # invalid bytes
         (b"abc\x80\x80\xc1\xc4", "strict",  None),
         (b"abc\xc8", "strict",  None),
-        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
-        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
+        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u8b10"),
+        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u8b10\ufffd"),
         (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u8b10"),
     )
 
diff --git a/Lib/test/test_codecencodings_jp.py b/Lib/test/test_codecencodings_jp.py
index f56a373..87e4812 100644
--- a/Lib/test/test_codecencodings_jp.py
+++ b/Lib/test/test_codecencodings_jp.py
@@ -15,50 +15,57 @@
         # invalid bytes
         (b"abc\x81\x00\x81\x00\x82\x84", "strict",  None),
         (b"abc\xf8", "strict",  None),
-        (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\uff44"),
-        (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
-        (b"abc\x81\x00\x82\x84", "ignore",  "abc\uff44"),
+        (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\x00\uff44"),
+        (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\x00\uff44\ufffd"),
+        (b"abc\x81\x00\x82\x84", "ignore",  "abc\x00\uff44"),
+        (b"ab\xEBxy", "replace", "ab\uFFFDxy"),
+        (b"ab\xF0\x39xy", "replace", "ab\uFFFD9xy"),
+        (b"ab\xEA\xF0xy", "replace", 'ab\ufffd\ue038y'),
         # sjis vs cp932
         (b"\\\x7e", "replace", "\\\x7e"),
         (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"),
     )
 
+euc_commontests = (
+    # invalid bytes
+    (b"abc\x80\x80\xc1\xc4", "strict",  None),
+    (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u7956"),
+    (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u7956\ufffd"),
+    (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u7956"),
+    (b"abc\xc8", "strict",  None),
+    (b"abc\x8f\x83\x83", "replace", "abc\ufffd\ufffd\ufffd"),
+    (b"\x82\xFCxy", "replace", "\ufffd\ufffdxy"),
+    (b"\xc1\x64", "strict", None),
+    (b"\xa1\xc0", "strict", "\uff3c"),
+    (b"\xa1\xc0\\", "strict", "\uff3c\\"),
+    (b"\x8eXY", "replace", "\ufffdXY"),
+)
+
+class Test_EUC_JIS_2004(test_multibytecodec_support.TestBase,
+                        unittest.TestCase):
+    encoding = 'euc_jis_2004'
+    tstring = test_multibytecodec_support.load_teststring('euc_jisx0213')
+    codectests = euc_commontests
+    xmlcharnametest = (
+        "\xab\u211c\xbb = \u2329\u1234\u232a",
+        b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
+    )
+
 class Test_EUC_JISX0213(test_multibytecodec_support.TestBase,
                         unittest.TestCase):
     encoding = 'euc_jisx0213'
     tstring = test_multibytecodec_support.load_teststring('euc_jisx0213')
-    codectests = (
-        # invalid bytes
-        (b"abc\x80\x80\xc1\xc4", "strict",  None),
-        (b"abc\xc8", "strict",  None),
-        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
-        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
-        (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u7956"),
-        (b"abc\x8f\x83\x83", "replace", "abc\ufffd"),
-        (b"\xc1\x64", "strict", None),
-        (b"\xa1\xc0", "strict", "\uff3c"),
-    )
+    codectests = euc_commontests
     xmlcharnametest = (
         "\xab\u211c\xbb = \u2329\u1234\u232a",
         b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
     )
 
-eucjp_commontests = (
-    (b"abc\x80\x80\xc1\xc4", "strict",  None),
-    (b"abc\xc8", "strict",  None),
-    (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
-    (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
-    (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u7956"),
-    (b"abc\x8f\x83\x83", "replace", "abc\ufffd"),
-    (b"\xc1\x64", "strict", None),
-)
-
 class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
                          unittest.TestCase):
     encoding = 'euc_jp'
     tstring = test_multibytecodec_support.load_teststring('euc_jp')
-    codectests = eucjp_commontests + (
-        (b"\xa1\xc0\\", "strict", "\uff3c\\"),
+    codectests = euc_commontests + (
         ("\xa5", "strict", b"\x5c"),
         ("\u203e", "strict", b"\x7e"),
     )
@@ -66,8 +73,6 @@
 shiftjis_commonenctests = (
     (b"abc\x80\x80\x82\x84", "strict",  None),
     (b"abc\xf8", "strict",  None),
-    (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
-    (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
     (b"abc\x80\x80\x82\x84def", "ignore",  "abc\uff44def"),
 )
 
@@ -75,20 +80,41 @@
     encoding = 'shift_jis'
     tstring = test_multibytecodec_support.load_teststring('shift_jis')
     codectests = shiftjis_commonenctests + (
+        (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
+        (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
+
         (b"\\\x7e", "strict", "\\\x7e"),
         (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
+        (b"abc\x81\x39", "replace",  "abc\ufffd9"),
+        (b"abc\xEA\xFC", "replace",  "abc\ufffd\ufffd"),
+        (b"abc\xFF\x58", "replace",  "abc\ufffdX"),
+    )
+
+class Test_SJIS_2004(test_multibytecodec_support.TestBase, unittest.TestCase):
+    encoding = 'shift_jis_2004'
+    tstring = test_multibytecodec_support.load_teststring('shift_jis')
+    codectests = shiftjis_commonenctests + (
+        (b"\\\x7e", "strict", "\xa5\u203e"),
+        (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\\\u2016\u2212"),
+        (b"abc\xEA\xFC", "strict",  "abc\u64bf"),
+        (b"\x81\x39xy", "replace",  "\ufffd9xy"),
+        (b"\xFF\x58xy", "replace",  "\ufffdXxy"),
+        (b"\x80\x80\x82\x84xy", "replace", "\ufffd\ufffd\uff44xy"),
+        (b"\x80\x80\x82\x84\x88xy", "replace", "\ufffd\ufffd\uff44\u5864y"),
+        (b"\xFC\xFBxy", "replace", '\ufffd\u95b4y'),
+    )
+    xmlcharnametest = (
+        "\xab\u211c\xbb = \u2329\u1234\u232a",
+        b"\x85Gℜ\x85Q = ⟨ሴ⟩"
     )
 
 class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
     encoding = 'shift_jisx0213'
     tstring = test_multibytecodec_support.load_teststring('shift_jisx0213')
-    codectests = (
-        # invalid bytes
-        (b"abc\x80\x80\x82\x84", "strict",  None),
-        (b"abc\xf8", "strict",  None),
-        (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
-        (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
-        (b"abc\x80\x80\x82\x84def", "ignore",  "abc\uff44def"),
+    codectests = shiftjis_commonenctests + (
+        (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
+        (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
+
         # sjis vs cp932
         (b"\\\x7e", "replace", "\xa5\u203e"),
         (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"),
diff --git a/Lib/test/test_codecencodings_kr.py b/Lib/test/test_codecencodings_kr.py
index de4da7f..4997e83 100644
--- a/Lib/test/test_codecencodings_kr.py
+++ b/Lib/test/test_codecencodings_kr.py
@@ -15,8 +15,8 @@
         # invalid bytes
         (b"abc\x80\x80\xc1\xc4", "strict",  None),
         (b"abc\xc8", "strict",  None),
-        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
-        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
+        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\uc894"),
+        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\uc894\ufffd"),
         (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\uc894"),
     )
 
@@ -27,8 +27,8 @@
         # invalid bytes
         (b"abc\x80\x80\xc1\xc4", "strict",  None),
         (b"abc\xc8", "strict",  None),
-        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
-        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
+        (b"abc\x80\x80\xc1\xc4", "replace", 'abc\ufffd\ufffd\uc894'),
+        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\uc894\ufffd"),
         (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\uc894"),
 
         # composed make-up sequence errors
@@ -40,13 +40,14 @@
         (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4", "strict", None),
         (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "strict", "\uc4d4"),
         (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4x", "strict", "\uc4d4x"),
-        (b"a\xa4\xd4\xa4\xb6\xa4", "replace", "a\ufffd"),
+        (b"a\xa4\xd4\xa4\xb6\xa4", "replace", 'a\ufffd'),
         (b"\xa4\xd4\xa3\xb6\xa4\xd0\xa4\xd4", "strict", None),
         (b"\xa4\xd4\xa4\xb6\xa3\xd0\xa4\xd4", "strict", None),
         (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa3\xd4", "strict", None),
-        (b"\xa4\xd4\xa4\xff\xa4\xd0\xa4\xd4", "replace", "\ufffd"),
-        (b"\xa4\xd4\xa4\xb6\xa4\xff\xa4\xd4", "replace", "\ufffd"),
-        (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xff", "replace", "\ufffd"),
+        (b"\xa4\xd4\xa4\xff\xa4\xd0\xa4\xd4", "replace", '\ufffd\u6e21\ufffd\u3160\ufffd'),
+        (b"\xa4\xd4\xa4\xb6\xa4\xff\xa4\xd4", "replace", '\ufffd\u6e21\ub544\ufffd\ufffd'),
+        (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xff", "replace", '\ufffd\u6e21\ub544\u572d\ufffd'),
+        (b"\xa4\xd4\xff\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "replace", '\ufffd\ufffd\ufffd\uc4d4'),
         (b"\xc1\xc4", "strict", "\uc894"),
     )
 
@@ -57,9 +58,13 @@
         # invalid bytes
         (b"abc\x80\x80\xc1\xc4", "strict",  None),
         (b"abc\xc8", "strict",  None),
-        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ucd27"),
-        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ucd27\ufffd"),
+        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\ucd27"),
+        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\ucd27\ufffd"),
         (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\ucd27"),
+        (b"\xD8abc", "replace",  "\uFFFDabc"),
+        (b"\xD8\xFFabc", "replace",  "\uFFFD\uFFFDabc"),
+        (b"\x84bxy", "replace",  "\uFFFDbxy"),
+        (b"\x8CBxy", "replace",  "\uFFFDBxy"),
     )
 
 def test_main():
diff --git a/Lib/test/test_codecencodings_tw.py b/Lib/test/test_codecencodings_tw.py
index 12d3c9f..f2f3c18 100644
--- a/Lib/test/test_codecencodings_tw.py
+++ b/Lib/test/test_codecencodings_tw.py
@@ -15,8 +15,8 @@
         # invalid bytes
         (b"abc\x80\x80\xc1\xc4", "strict",  None),
         (b"abc\xc8", "strict",  None),
-        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
-        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
+        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u8b10"),
+        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u8b10\ufffd"),
         (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u8b10"),
     )
 
diff --git a/Lib/test/test_codecmaps_tw.py b/Lib/test/test_codecmaps_tw.py
index 6db5091..412b9de 100644
--- a/Lib/test/test_codecmaps_tw.py
+++ b/Lib/test/test_codecmaps_tw.py
@@ -23,6 +23,9 @@
         (b'\xa2\xcc', '\u5341'),
         (b'\xa2\xce', '\u5345'),
     ]
+    codectests = (
+        (b"\xFFxy", "replace",  "\ufffdxy"),
+    )
 
 def test_main():
     support.run_unittest(__name__)