blob: 44b63a00c8b4c493047cefd6c19e4e7e6b523df4 [file] [log] [blame]
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001#
2# test_codecencodings_jp.py
3# Codec encoding tests for Japanese encodings.
4#
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00005
Benjamin Petersonee8712c2008-05-20 21:35:26 +00006from test import support
R David Murray75d9aca2012-04-09 09:37:52 -04007from test import multibytecodec_support
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00008import unittest
9
R David Murray75d9aca2012-04-09 09:37:52 -040010class Test_CP932(multibytecodec_support.TestBase, unittest.TestCase):
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000011 encoding = 'cp932'
R David Murray75d9aca2012-04-09 09:37:52 -040012 tstring = multibytecodec_support.load_teststring('shift_jis')
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000013 codectests = (
14 # invalid bytes
Guido van Rossum024da5c2007-05-17 23:59:11 +000015 (b"abc\x81\x00\x81\x00\x82\x84", "strict", None),
16 (b"abc\xf8", "strict", None),
Victor Stinner2cded9c2011-07-08 01:45:13 +020017 (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\x00\uff44"),
18 (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\x00\uff44\ufffd"),
19 (b"abc\x81\x00\x82\x84", "ignore", "abc\x00\uff44"),
20 (b"ab\xEBxy", "replace", "ab\uFFFDxy"),
21 (b"ab\xF0\x39xy", "replace", "ab\uFFFD9xy"),
22 (b"ab\xEA\xF0xy", "replace", 'ab\ufffd\ue038y'),
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000023 # sjis vs cp932
Guido van Rossum024da5c2007-05-17 23:59:11 +000024 (b"\\\x7e", "replace", "\\\x7e"),
25 (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"),
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000026 )
27
Victor Stinner2cded9c2011-07-08 01:45:13 +020028euc_commontests = (
29 # invalid bytes
30 (b"abc\x80\x80\xc1\xc4", "strict", None),
31 (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u7956"),
32 (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u7956\ufffd"),
33 (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
34 (b"abc\xc8", "strict", None),
35 (b"abc\x8f\x83\x83", "replace", "abc\ufffd\ufffd\ufffd"),
36 (b"\x82\xFCxy", "replace", "\ufffd\ufffdxy"),
37 (b"\xc1\x64", "strict", None),
38 (b"\xa1\xc0", "strict", "\uff3c"),
39 (b"\xa1\xc0\\", "strict", "\uff3c\\"),
40 (b"\x8eXY", "replace", "\ufffdXY"),
41)
42
R David Murray75d9aca2012-04-09 09:37:52 -040043class Test_EUC_JIS_2004(multibytecodec_support.TestBase,
Victor Stinner2cded9c2011-07-08 01:45:13 +020044 unittest.TestCase):
45 encoding = 'euc_jis_2004'
R David Murray75d9aca2012-04-09 09:37:52 -040046 tstring = multibytecodec_support.load_teststring('euc_jisx0213')
Victor Stinner2cded9c2011-07-08 01:45:13 +020047 codectests = euc_commontests
48 xmlcharnametest = (
49 "\xab\u211c\xbb = \u2329\u1234\u232a",
50 b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
51 )
52
R David Murray75d9aca2012-04-09 09:37:52 -040053class Test_EUC_JISX0213(multibytecodec_support.TestBase,
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000054 unittest.TestCase):
55 encoding = 'euc_jisx0213'
R David Murray75d9aca2012-04-09 09:37:52 -040056 tstring = multibytecodec_support.load_teststring('euc_jisx0213')
Victor Stinner2cded9c2011-07-08 01:45:13 +020057 codectests = euc_commontests
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000058 xmlcharnametest = (
Guido van Rossumef87d6e2007-05-02 19:09:54 +000059 "\xab\u211c\xbb = \u2329\u1234\u232a",
Guido van Rossum024da5c2007-05-17 23:59:11 +000060 b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000061 )
62
R David Murray75d9aca2012-04-09 09:37:52 -040063class Test_EUC_JP_COMPAT(multibytecodec_support.TestBase,
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000064 unittest.TestCase):
65 encoding = 'euc_jp'
R David Murray75d9aca2012-04-09 09:37:52 -040066 tstring = multibytecodec_support.load_teststring('euc_jp')
Victor Stinner2cded9c2011-07-08 01:45:13 +020067 codectests = euc_commontests + (
Guido van Rossum024da5c2007-05-17 23:59:11 +000068 ("\xa5", "strict", b"\x5c"),
69 ("\u203e", "strict", b"\x7e"),
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000070 )
71
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000072shiftjis_commonenctests = (
Guido van Rossum024da5c2007-05-17 23:59:11 +000073 (b"abc\x80\x80\x82\x84", "strict", None),
74 (b"abc\xf8", "strict", None),
Guido van Rossum024da5c2007-05-17 23:59:11 +000075 (b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"),
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000076)
77
R David Murray75d9aca2012-04-09 09:37:52 -040078class Test_SJIS_COMPAT(multibytecodec_support.TestBase, unittest.TestCase):
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000079 encoding = 'shift_jis'
R David Murray75d9aca2012-04-09 09:37:52 -040080 tstring = multibytecodec_support.load_teststring('shift_jis')
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000081 codectests = shiftjis_commonenctests + (
Victor Stinner2cded9c2011-07-08 01:45:13 +020082 (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
83 (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
84
Guido van Rossum024da5c2007-05-17 23:59:11 +000085 (b"\\\x7e", "strict", "\\\x7e"),
86 (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
Victor Stinner2cded9c2011-07-08 01:45:13 +020087 (b"abc\x81\x39", "replace", "abc\ufffd9"),
88 (b"abc\xEA\xFC", "replace", "abc\ufffd\ufffd"),
89 (b"abc\xFF\x58", "replace", "abc\ufffdX"),
90 )
91
R David Murray75d9aca2012-04-09 09:37:52 -040092class Test_SJIS_2004(multibytecodec_support.TestBase, unittest.TestCase):
Victor Stinner2cded9c2011-07-08 01:45:13 +020093 encoding = 'shift_jis_2004'
R David Murray75d9aca2012-04-09 09:37:52 -040094 tstring = multibytecodec_support.load_teststring('shift_jis')
Victor Stinner2cded9c2011-07-08 01:45:13 +020095 codectests = shiftjis_commonenctests + (
96 (b"\\\x7e", "strict", "\xa5\u203e"),
97 (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\\\u2016\u2212"),
98 (b"abc\xEA\xFC", "strict", "abc\u64bf"),
99 (b"\x81\x39xy", "replace", "\ufffd9xy"),
100 (b"\xFF\x58xy", "replace", "\ufffdXxy"),
101 (b"\x80\x80\x82\x84xy", "replace", "\ufffd\ufffd\uff44xy"),
102 (b"\x80\x80\x82\x84\x88xy", "replace", "\ufffd\ufffd\uff44\u5864y"),
103 (b"\xFC\xFBxy", "replace", '\ufffd\u95b4y'),
104 )
105 xmlcharnametest = (
106 "\xab\u211c\xbb = \u2329\u1234\u232a",
107 b"\x85Gℜ\x85Q = ⟨ሴ⟩"
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000108 )
109
R David Murray75d9aca2012-04-09 09:37:52 -0400110class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase):
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000111 encoding = 'shift_jisx0213'
R David Murray75d9aca2012-04-09 09:37:52 -0400112 tstring = multibytecodec_support.load_teststring('shift_jisx0213')
Victor Stinner2cded9c2011-07-08 01:45:13 +0200113 codectests = shiftjis_commonenctests + (
114 (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
115 (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
116
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000117 # sjis vs cp932
Guido van Rossum024da5c2007-05-17 23:59:11 +0000118 (b"\\\x7e", "replace", "\xa5\u203e"),
119 (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"),
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000120 )
121 xmlcharnametest = (
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000122 "\xab\u211c\xbb = \u2329\u1234\u232a",
Guido van Rossum024da5c2007-05-17 23:59:11 +0000123 b"\x85Gℜ\x85Q = ⟨ሴ⟩"
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000124 )
125
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000126if __name__ == "__main__":
Zachary Ware38c707e2015-04-13 15:00:43 -0500127 unittest.main()