blob: 94378d124f7485d5c6c291e48c511c0af4df8eb5 [file] [log] [blame]
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001#
2# test_codecencodings_jp.py
3# Codec encoding tests for Japanese encodings.
4#
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00005
R David Murray75d9aca2012-04-09 09:37:52 -04006from test import multibytecodec_support
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00007import unittest
8
R David Murray75d9aca2012-04-09 09:37:52 -04009class Test_CP932(multibytecodec_support.TestBase, unittest.TestCase):
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000010 encoding = 'cp932'
R David Murray75d9aca2012-04-09 09:37:52 -040011 tstring = multibytecodec_support.load_teststring('shift_jis')
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000012 codectests = (
13 # invalid bytes
Guido van Rossum024da5c2007-05-17 23:59:11 +000014 (b"abc\x81\x00\x81\x00\x82\x84", "strict", None),
15 (b"abc\xf8", "strict", None),
Victor Stinner2cded9c2011-07-08 01:45:13 +020016 (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\x00\uff44"),
17 (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\x00\uff44\ufffd"),
18 (b"abc\x81\x00\x82\x84", "ignore", "abc\x00\uff44"),
19 (b"ab\xEBxy", "replace", "ab\uFFFDxy"),
20 (b"ab\xF0\x39xy", "replace", "ab\uFFFD9xy"),
21 (b"ab\xEA\xF0xy", "replace", 'ab\ufffd\ue038y'),
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000022 # sjis vs cp932
Guido van Rossum024da5c2007-05-17 23:59:11 +000023 (b"\\\x7e", "replace", "\\\x7e"),
24 (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"),
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000025 )
26
Victor Stinner2cded9c2011-07-08 01:45:13 +020027euc_commontests = (
28 # invalid bytes
29 (b"abc\x80\x80\xc1\xc4", "strict", None),
30 (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u7956"),
31 (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u7956\ufffd"),
32 (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
33 (b"abc\xc8", "strict", None),
34 (b"abc\x8f\x83\x83", "replace", "abc\ufffd\ufffd\ufffd"),
35 (b"\x82\xFCxy", "replace", "\ufffd\ufffdxy"),
36 (b"\xc1\x64", "strict", None),
37 (b"\xa1\xc0", "strict", "\uff3c"),
38 (b"\xa1\xc0\\", "strict", "\uff3c\\"),
39 (b"\x8eXY", "replace", "\ufffdXY"),
40)
41
R David Murray75d9aca2012-04-09 09:37:52 -040042class Test_EUC_JIS_2004(multibytecodec_support.TestBase,
Victor Stinner2cded9c2011-07-08 01:45:13 +020043 unittest.TestCase):
44 encoding = 'euc_jis_2004'
R David Murray75d9aca2012-04-09 09:37:52 -040045 tstring = multibytecodec_support.load_teststring('euc_jisx0213')
Victor Stinner2cded9c2011-07-08 01:45:13 +020046 codectests = euc_commontests
47 xmlcharnametest = (
48 "\xab\u211c\xbb = \u2329\u1234\u232a",
49 b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
50 )
51
R David Murray75d9aca2012-04-09 09:37:52 -040052class Test_EUC_JISX0213(multibytecodec_support.TestBase,
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000053 unittest.TestCase):
54 encoding = 'euc_jisx0213'
R David Murray75d9aca2012-04-09 09:37:52 -040055 tstring = multibytecodec_support.load_teststring('euc_jisx0213')
Victor Stinner2cded9c2011-07-08 01:45:13 +020056 codectests = euc_commontests
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000057 xmlcharnametest = (
Guido van Rossumef87d6e2007-05-02 19:09:54 +000058 "\xab\u211c\xbb = \u2329\u1234\u232a",
Guido van Rossum024da5c2007-05-17 23:59:11 +000059 b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000060 )
61
R David Murray75d9aca2012-04-09 09:37:52 -040062class Test_EUC_JP_COMPAT(multibytecodec_support.TestBase,
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000063 unittest.TestCase):
64 encoding = 'euc_jp'
R David Murray75d9aca2012-04-09 09:37:52 -040065 tstring = multibytecodec_support.load_teststring('euc_jp')
Victor Stinner2cded9c2011-07-08 01:45:13 +020066 codectests = euc_commontests + (
Guido van Rossum024da5c2007-05-17 23:59:11 +000067 ("\xa5", "strict", b"\x5c"),
68 ("\u203e", "strict", b"\x7e"),
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000069 )
70
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000071shiftjis_commonenctests = (
Guido van Rossum024da5c2007-05-17 23:59:11 +000072 (b"abc\x80\x80\x82\x84", "strict", None),
73 (b"abc\xf8", "strict", None),
Guido van Rossum024da5c2007-05-17 23:59:11 +000074 (b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"),
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000075)
76
R David Murray75d9aca2012-04-09 09:37:52 -040077class Test_SJIS_COMPAT(multibytecodec_support.TestBase, unittest.TestCase):
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000078 encoding = 'shift_jis'
R David Murray75d9aca2012-04-09 09:37:52 -040079 tstring = multibytecodec_support.load_teststring('shift_jis')
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000080 codectests = shiftjis_commonenctests + (
Victor Stinner2cded9c2011-07-08 01:45:13 +020081 (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
82 (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
83
Guido van Rossum024da5c2007-05-17 23:59:11 +000084 (b"\\\x7e", "strict", "\\\x7e"),
85 (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
Victor Stinner2cded9c2011-07-08 01:45:13 +020086 (b"abc\x81\x39", "replace", "abc\ufffd9"),
87 (b"abc\xEA\xFC", "replace", "abc\ufffd\ufffd"),
88 (b"abc\xFF\x58", "replace", "abc\ufffdX"),
89 )
90
R David Murray75d9aca2012-04-09 09:37:52 -040091class Test_SJIS_2004(multibytecodec_support.TestBase, unittest.TestCase):
Victor Stinner2cded9c2011-07-08 01:45:13 +020092 encoding = 'shift_jis_2004'
R David Murray75d9aca2012-04-09 09:37:52 -040093 tstring = multibytecodec_support.load_teststring('shift_jis')
Victor Stinner2cded9c2011-07-08 01:45:13 +020094 codectests = shiftjis_commonenctests + (
95 (b"\\\x7e", "strict", "\xa5\u203e"),
96 (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\\\u2016\u2212"),
97 (b"abc\xEA\xFC", "strict", "abc\u64bf"),
98 (b"\x81\x39xy", "replace", "\ufffd9xy"),
99 (b"\xFF\x58xy", "replace", "\ufffdXxy"),
100 (b"\x80\x80\x82\x84xy", "replace", "\ufffd\ufffd\uff44xy"),
101 (b"\x80\x80\x82\x84\x88xy", "replace", "\ufffd\ufffd\uff44\u5864y"),
102 (b"\xFC\xFBxy", "replace", '\ufffd\u95b4y'),
103 )
104 xmlcharnametest = (
105 "\xab\u211c\xbb = \u2329\u1234\u232a",
106 b"\x85Gℜ\x85Q = ⟨ሴ⟩"
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000107 )
108
R David Murray75d9aca2012-04-09 09:37:52 -0400109class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase):
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000110 encoding = 'shift_jisx0213'
R David Murray75d9aca2012-04-09 09:37:52 -0400111 tstring = multibytecodec_support.load_teststring('shift_jisx0213')
Victor Stinner2cded9c2011-07-08 01:45:13 +0200112 codectests = shiftjis_commonenctests + (
113 (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
114 (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
115
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000116 # sjis vs cp932
Guido van Rossum024da5c2007-05-17 23:59:11 +0000117 (b"\\\x7e", "replace", "\xa5\u203e"),
118 (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"),
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000119 )
120 xmlcharnametest = (
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000121 "\xab\u211c\xbb = \u2329\u1234\u232a",
Guido van Rossum024da5c2007-05-17 23:59:11 +0000122 b"\x85Gℜ\x85Q = ⟨ሴ⟩"
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000123 )
124
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000125if __name__ == "__main__":
Zachary Ware38c707e2015-04-13 15:00:43 -0500126 unittest.main()