Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 1 | # |
| 2 | # test_codecencodings_cn.py |
| 3 | # Codec encoding tests for PRC encodings. |
| 4 | # |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 5 | |
Benjamin Peterson | ee8712c | 2008-05-20 21:35:26 +0000 | [diff] [blame] | 6 | from test import support |
R David Murray | 75d9aca | 2012-04-09 09:37:52 -0400 | [diff] [blame] | 7 | from test import multibytecodec_support |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 8 | import unittest |
| 9 | |
R David Murray | 75d9aca | 2012-04-09 09:37:52 -0400 | [diff] [blame] | 10 | class Test_GB2312(multibytecodec_support.TestBase, unittest.TestCase): |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 11 | encoding = 'gb2312' |
R David Murray | 75d9aca | 2012-04-09 09:37:52 -0400 | [diff] [blame] | 12 | tstring = multibytecodec_support.load_teststring('gb2312') |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 13 | codectests = ( |
| 14 | # invalid bytes |
Guido van Rossum | 024da5c | 2007-05-17 23:59:11 +0000 | [diff] [blame] | 15 | (b"abc\x81\x81\xc1\xc4", "strict", None), |
| 16 | (b"abc\xc8", "strict", None), |
Victor Stinner | 2cded9c | 2011-07-08 01:45:13 +0200 | [diff] [blame] | 17 | (b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"), |
| 18 | (b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"), |
Guido van Rossum | 024da5c | 2007-05-17 23:59:11 +0000 | [diff] [blame] | 19 | (b"abc\x81\x81\xc1\xc4", "ignore", "abc\u804a"), |
| 20 | (b"\xc1\x64", "strict", None), |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 21 | ) |
| 22 | |
R David Murray | 75d9aca | 2012-04-09 09:37:52 -0400 | [diff] [blame] | 23 | class Test_GBK(multibytecodec_support.TestBase, unittest.TestCase): |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 24 | encoding = 'gbk' |
R David Murray | 75d9aca | 2012-04-09 09:37:52 -0400 | [diff] [blame] | 25 | tstring = multibytecodec_support.load_teststring('gbk') |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 26 | codectests = ( |
| 27 | # invalid bytes |
Guido van Rossum | 024da5c | 2007-05-17 23:59:11 +0000 | [diff] [blame] | 28 | (b"abc\x80\x80\xc1\xc4", "strict", None), |
| 29 | (b"abc\xc8", "strict", None), |
Victor Stinner | 2cded9c | 2011-07-08 01:45:13 +0200 | [diff] [blame] | 30 | (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"), |
| 31 | (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"), |
Guido van Rossum | 024da5c | 2007-05-17 23:59:11 +0000 | [diff] [blame] | 32 | (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"), |
| 33 | (b"\x83\x34\x83\x31", "strict", None), |
Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 34 | ("\u30fb", "strict", None), |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 35 | ) |
| 36 | |
R David Murray | 75d9aca | 2012-04-09 09:37:52 -0400 | [diff] [blame] | 37 | class Test_GB18030(multibytecodec_support.TestBase, unittest.TestCase): |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 38 | encoding = 'gb18030' |
R David Murray | 75d9aca | 2012-04-09 09:37:52 -0400 | [diff] [blame] | 39 | tstring = multibytecodec_support.load_teststring('gb18030') |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 40 | codectests = ( |
| 41 | # invalid bytes |
Guido van Rossum | 024da5c | 2007-05-17 23:59:11 +0000 | [diff] [blame] | 42 | (b"abc\x80\x80\xc1\xc4", "strict", None), |
| 43 | (b"abc\xc8", "strict", None), |
Victor Stinner | 2cded9c | 2011-07-08 01:45:13 +0200 | [diff] [blame] | 44 | (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"), |
| 45 | (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"), |
Guido van Rossum | 024da5c | 2007-05-17 23:59:11 +0000 | [diff] [blame] | 46 | (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"), |
Victor Stinner | 2cded9c | 2011-07-08 01:45:13 +0200 | [diff] [blame] | 47 | (b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd9\ufffd9\u804a"), |
Guido van Rossum | 024da5c | 2007-05-17 23:59:11 +0000 | [diff] [blame] | 48 | ("\u30fb", "strict", b"\x819\xa79"), |
Victor Stinner | 2cded9c | 2011-07-08 01:45:13 +0200 | [diff] [blame] | 49 | (b"abc\x84\x32\x80\x80def", "replace", 'abc\ufffd2\ufffd\ufffddef'), |
| 50 | (b"abc\x81\x30\x81\x30def", "strict", 'abc\x80def'), |
| 51 | (b"abc\x86\x30\x81\x30def", "replace", 'abc\ufffd0\ufffd0def'), |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 52 | ) |
| 53 | has_iso10646 = True |
| 54 | |
R David Murray | 75d9aca | 2012-04-09 09:37:52 -0400 | [diff] [blame] | 55 | class Test_HZ(multibytecodec_support.TestBase, unittest.TestCase): |
Victor Stinner | 8fdfc20 | 2011-05-25 00:06:51 +0200 | [diff] [blame] | 56 | encoding = 'hz' |
R David Murray | 75d9aca | 2012-04-09 09:37:52 -0400 | [diff] [blame] | 57 | tstring = multibytecodec_support.load_teststring('hz') |
Victor Stinner | 8fdfc20 | 2011-05-25 00:06:51 +0200 | [diff] [blame] | 58 | codectests = ( |
| 59 | # test '~\n' (3 lines) |
| 60 | (b'This sentence is in ASCII.\n' |
| 61 | b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n' |
| 62 | b'~{NpJ)l6HK!#~}Bye.\n', |
| 63 | 'strict', |
| 64 | 'This sentence is in ASCII.\n' |
| 65 | 'The next sentence is in GB.' |
| 66 | '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002' |
| 67 | 'Bye.\n'), |
| 68 | # test '~\n' (4 lines) |
| 69 | (b'This sentence is in ASCII.\n' |
| 70 | b'The next sentence is in GB.~\n' |
| 71 | b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n' |
| 72 | b'Bye.\n', |
| 73 | 'strict', |
| 74 | 'This sentence is in ASCII.\n' |
| 75 | 'The next sentence is in GB.' |
| 76 | '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002' |
| 77 | 'Bye.\n'), |
| 78 | # invalid bytes |
Victor Stinner | 2cded9c | 2011-07-08 01:45:13 +0200 | [diff] [blame] | 79 | (b'ab~cd', 'replace', 'ab\uFFFDcd'), |
Victor Stinner | 8fdfc20 | 2011-05-25 00:06:51 +0200 | [diff] [blame] | 80 | (b'ab\xffcd', 'replace', 'ab\uFFFDcd'), |
| 81 | (b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'), |
Victor Stinner | 2cded9c | 2011-07-08 01:45:13 +0200 | [diff] [blame] | 82 | (b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'), |
| 83 | (b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"), |
Victor Stinner | 8fdfc20 | 2011-05-25 00:06:51 +0200 | [diff] [blame] | 84 | ) |
| 85 | |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 86 | def test_main(): |
Benjamin Peterson | ee8712c | 2008-05-20 21:35:26 +0000 | [diff] [blame] | 87 | support.run_unittest(__name__) |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 88 | |
| 89 | if __name__ == "__main__": |
| 90 | test_main() |