blob: 30c9e195f317af32560199a5c447294eefdea10d [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00002#
3# test_codecencodings_jp.py
4# Codec encoding tests for Japanese encodings.
5#
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00006
Benjamin Petersonee8712c2008-05-20 21:35:26 +00007from test import support
R David Murray75d9aca2012-04-09 09:37:52 -04008from test import multibytecodec_support
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00009import unittest
10
R David Murray75d9aca2012-04-09 09:37:52 -040011class Test_CP932(multibytecodec_support.TestBase, unittest.TestCase):
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000012 encoding = 'cp932'
R David Murray75d9aca2012-04-09 09:37:52 -040013 tstring = multibytecodec_support.load_teststring('shift_jis')
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000014 codectests = (
15 # invalid bytes
Guido van Rossum024da5c2007-05-17 23:59:11 +000016 (b"abc\x81\x00\x81\x00\x82\x84", "strict", None),
17 (b"abc\xf8", "strict", None),
Victor Stinner2cded9c2011-07-08 01:45:13 +020018 (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\x00\uff44"),
19 (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\x00\uff44\ufffd"),
20 (b"abc\x81\x00\x82\x84", "ignore", "abc\x00\uff44"),
21 (b"ab\xEBxy", "replace", "ab\uFFFDxy"),
22 (b"ab\xF0\x39xy", "replace", "ab\uFFFD9xy"),
23 (b"ab\xEA\xF0xy", "replace", 'ab\ufffd\ue038y'),
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000024 # sjis vs cp932
Guido van Rossum024da5c2007-05-17 23:59:11 +000025 (b"\\\x7e", "replace", "\\\x7e"),
26 (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"),
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000027 )
28
Victor Stinner2cded9c2011-07-08 01:45:13 +020029euc_commontests = (
30 # invalid bytes
31 (b"abc\x80\x80\xc1\xc4", "strict", None),
32 (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u7956"),
33 (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u7956\ufffd"),
34 (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
35 (b"abc\xc8", "strict", None),
36 (b"abc\x8f\x83\x83", "replace", "abc\ufffd\ufffd\ufffd"),
37 (b"\x82\xFCxy", "replace", "\ufffd\ufffdxy"),
38 (b"\xc1\x64", "strict", None),
39 (b"\xa1\xc0", "strict", "\uff3c"),
40 (b"\xa1\xc0\\", "strict", "\uff3c\\"),
41 (b"\x8eXY", "replace", "\ufffdXY"),
42)
43
R David Murray75d9aca2012-04-09 09:37:52 -040044class Test_EUC_JIS_2004(multibytecodec_support.TestBase,
Victor Stinner2cded9c2011-07-08 01:45:13 +020045 unittest.TestCase):
46 encoding = 'euc_jis_2004'
R David Murray75d9aca2012-04-09 09:37:52 -040047 tstring = multibytecodec_support.load_teststring('euc_jisx0213')
Victor Stinner2cded9c2011-07-08 01:45:13 +020048 codectests = euc_commontests
49 xmlcharnametest = (
50 "\xab\u211c\xbb = \u2329\u1234\u232a",
51 b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
52 )
53
R David Murray75d9aca2012-04-09 09:37:52 -040054class Test_EUC_JISX0213(multibytecodec_support.TestBase,
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000055 unittest.TestCase):
56 encoding = 'euc_jisx0213'
R David Murray75d9aca2012-04-09 09:37:52 -040057 tstring = multibytecodec_support.load_teststring('euc_jisx0213')
Victor Stinner2cded9c2011-07-08 01:45:13 +020058 codectests = euc_commontests
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000059 xmlcharnametest = (
Guido van Rossumef87d6e2007-05-02 19:09:54 +000060 "\xab\u211c\xbb = \u2329\u1234\u232a",
Guido van Rossum024da5c2007-05-17 23:59:11 +000061 b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000062 )
63
R David Murray75d9aca2012-04-09 09:37:52 -040064class Test_EUC_JP_COMPAT(multibytecodec_support.TestBase,
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000065 unittest.TestCase):
66 encoding = 'euc_jp'
R David Murray75d9aca2012-04-09 09:37:52 -040067 tstring = multibytecodec_support.load_teststring('euc_jp')
Victor Stinner2cded9c2011-07-08 01:45:13 +020068 codectests = euc_commontests + (
Guido van Rossum024da5c2007-05-17 23:59:11 +000069 ("\xa5", "strict", b"\x5c"),
70 ("\u203e", "strict", b"\x7e"),
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000071 )
72
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000073shiftjis_commonenctests = (
Guido van Rossum024da5c2007-05-17 23:59:11 +000074 (b"abc\x80\x80\x82\x84", "strict", None),
75 (b"abc\xf8", "strict", None),
Guido van Rossum024da5c2007-05-17 23:59:11 +000076 (b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"),
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000077)
78
R David Murray75d9aca2012-04-09 09:37:52 -040079class Test_SJIS_COMPAT(multibytecodec_support.TestBase, unittest.TestCase):
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000080 encoding = 'shift_jis'
R David Murray75d9aca2012-04-09 09:37:52 -040081 tstring = multibytecodec_support.load_teststring('shift_jis')
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000082 codectests = shiftjis_commonenctests + (
Victor Stinner2cded9c2011-07-08 01:45:13 +020083 (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
84 (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
85
Guido van Rossum024da5c2007-05-17 23:59:11 +000086 (b"\\\x7e", "strict", "\\\x7e"),
87 (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
Victor Stinner2cded9c2011-07-08 01:45:13 +020088 (b"abc\x81\x39", "replace", "abc\ufffd9"),
89 (b"abc\xEA\xFC", "replace", "abc\ufffd\ufffd"),
90 (b"abc\xFF\x58", "replace", "abc\ufffdX"),
91 )
92
R David Murray75d9aca2012-04-09 09:37:52 -040093class Test_SJIS_2004(multibytecodec_support.TestBase, unittest.TestCase):
Victor Stinner2cded9c2011-07-08 01:45:13 +020094 encoding = 'shift_jis_2004'
R David Murray75d9aca2012-04-09 09:37:52 -040095 tstring = multibytecodec_support.load_teststring('shift_jis')
Victor Stinner2cded9c2011-07-08 01:45:13 +020096 codectests = shiftjis_commonenctests + (
97 (b"\\\x7e", "strict", "\xa5\u203e"),
98 (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\\\u2016\u2212"),
99 (b"abc\xEA\xFC", "strict", "abc\u64bf"),
100 (b"\x81\x39xy", "replace", "\ufffd9xy"),
101 (b"\xFF\x58xy", "replace", "\ufffdXxy"),
102 (b"\x80\x80\x82\x84xy", "replace", "\ufffd\ufffd\uff44xy"),
103 (b"\x80\x80\x82\x84\x88xy", "replace", "\ufffd\ufffd\uff44\u5864y"),
104 (b"\xFC\xFBxy", "replace", '\ufffd\u95b4y'),
105 )
106 xmlcharnametest = (
107 "\xab\u211c\xbb = \u2329\u1234\u232a",
108 b"\x85Gℜ\x85Q = ⟨ሴ⟩"
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000109 )
110
R David Murray75d9aca2012-04-09 09:37:52 -0400111class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase):
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000112 encoding = 'shift_jisx0213'
R David Murray75d9aca2012-04-09 09:37:52 -0400113 tstring = multibytecodec_support.load_teststring('shift_jisx0213')
Victor Stinner2cded9c2011-07-08 01:45:13 +0200114 codectests = shiftjis_commonenctests + (
115 (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
116 (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
117
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000118 # sjis vs cp932
Guido van Rossum024da5c2007-05-17 23:59:11 +0000119 (b"\\\x7e", "replace", "\xa5\u203e"),
120 (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"),
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000121 )
122 xmlcharnametest = (
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000123 "\xab\u211c\xbb = \u2329\u1234\u232a",
Guido van Rossum024da5c2007-05-17 23:59:11 +0000124 b"\x85Gℜ\x85Q = ⟨ሴ⟩"
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000125 )
126
127def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000128 support.run_unittest(__name__)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000129
130if __name__ == "__main__":
131 test_main()