Benjamin Peterson | 90f5ba5 | 2010-03-11 22:53:45 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 2 | # |
| 3 | # test_codecencodings_cn.py |
| 4 | # Codec encoding tests for PRC encodings. |
| 5 | # |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 6 | |
Benjamin Peterson | ee8712c | 2008-05-20 21:35:26 +0000 | [diff] [blame] | 7 | from test import support |
R David Murray | 75d9aca | 2012-04-09 09:37:52 -0400 | [diff] [blame] | 8 | from test import multibytecodec_support |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 9 | import unittest |
| 10 | |
R David Murray | 75d9aca | 2012-04-09 09:37:52 -0400 | [diff] [blame] | 11 | class Test_GB2312(multibytecodec_support.TestBase, unittest.TestCase): |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 12 | encoding = 'gb2312' |
R David Murray | 75d9aca | 2012-04-09 09:37:52 -0400 | [diff] [blame] | 13 | tstring = multibytecodec_support.load_teststring('gb2312') |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 14 | codectests = ( |
| 15 | # invalid bytes |
Guido van Rossum | 024da5c | 2007-05-17 23:59:11 +0000 | [diff] [blame] | 16 | (b"abc\x81\x81\xc1\xc4", "strict", None), |
| 17 | (b"abc\xc8", "strict", None), |
Victor Stinner | 2cded9c | 2011-07-08 01:45:13 +0200 | [diff] [blame] | 18 | (b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"), |
| 19 | (b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"), |
Guido van Rossum | 024da5c | 2007-05-17 23:59:11 +0000 | [diff] [blame] | 20 | (b"abc\x81\x81\xc1\xc4", "ignore", "abc\u804a"), |
| 21 | (b"\xc1\x64", "strict", None), |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 22 | ) |
| 23 | |
R David Murray | 75d9aca | 2012-04-09 09:37:52 -0400 | [diff] [blame] | 24 | class Test_GBK(multibytecodec_support.TestBase, unittest.TestCase): |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 25 | encoding = 'gbk' |
R David Murray | 75d9aca | 2012-04-09 09:37:52 -0400 | [diff] [blame] | 26 | tstring = multibytecodec_support.load_teststring('gbk') |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 27 | codectests = ( |
| 28 | # invalid bytes |
Guido van Rossum | 024da5c | 2007-05-17 23:59:11 +0000 | [diff] [blame] | 29 | (b"abc\x80\x80\xc1\xc4", "strict", None), |
| 30 | (b"abc\xc8", "strict", None), |
Victor Stinner | 2cded9c | 2011-07-08 01:45:13 +0200 | [diff] [blame] | 31 | (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"), |
| 32 | (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"), |
Guido van Rossum | 024da5c | 2007-05-17 23:59:11 +0000 | [diff] [blame] | 33 | (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"), |
| 34 | (b"\x83\x34\x83\x31", "strict", None), |
Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 35 | ("\u30fb", "strict", None), |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 36 | ) |
| 37 | |
R David Murray | 75d9aca | 2012-04-09 09:37:52 -0400 | [diff] [blame] | 38 | class Test_GB18030(multibytecodec_support.TestBase, unittest.TestCase): |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 39 | encoding = 'gb18030' |
R David Murray | 75d9aca | 2012-04-09 09:37:52 -0400 | [diff] [blame] | 40 | tstring = multibytecodec_support.load_teststring('gb18030') |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 41 | codectests = ( |
| 42 | # invalid bytes |
Guido van Rossum | 024da5c | 2007-05-17 23:59:11 +0000 | [diff] [blame] | 43 | (b"abc\x80\x80\xc1\xc4", "strict", None), |
| 44 | (b"abc\xc8", "strict", None), |
Victor Stinner | 2cded9c | 2011-07-08 01:45:13 +0200 | [diff] [blame] | 45 | (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"), |
| 46 | (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"), |
Guido van Rossum | 024da5c | 2007-05-17 23:59:11 +0000 | [diff] [blame] | 47 | (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"), |
Victor Stinner | 2cded9c | 2011-07-08 01:45:13 +0200 | [diff] [blame] | 48 | (b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd9\ufffd9\u804a"), |
Guido van Rossum | 024da5c | 2007-05-17 23:59:11 +0000 | [diff] [blame] | 49 | ("\u30fb", "strict", b"\x819\xa79"), |
Victor Stinner | 2cded9c | 2011-07-08 01:45:13 +0200 | [diff] [blame] | 50 | (b"abc\x84\x32\x80\x80def", "replace", 'abc\ufffd2\ufffd\ufffddef'), |
| 51 | (b"abc\x81\x30\x81\x30def", "strict", 'abc\x80def'), |
| 52 | (b"abc\x86\x30\x81\x30def", "replace", 'abc\ufffd0\ufffd0def'), |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 53 | ) |
| 54 | has_iso10646 = True |
| 55 | |
R David Murray | 75d9aca | 2012-04-09 09:37:52 -0400 | [diff] [blame] | 56 | class Test_HZ(multibytecodec_support.TestBase, unittest.TestCase): |
Victor Stinner | 8fdfc20 | 2011-05-25 00:06:51 +0200 | [diff] [blame] | 57 | encoding = 'hz' |
R David Murray | 75d9aca | 2012-04-09 09:37:52 -0400 | [diff] [blame] | 58 | tstring = multibytecodec_support.load_teststring('hz') |
Victor Stinner | 8fdfc20 | 2011-05-25 00:06:51 +0200 | [diff] [blame] | 59 | codectests = ( |
| 60 | # test '~\n' (3 lines) |
| 61 | (b'This sentence is in ASCII.\n' |
| 62 | b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n' |
| 63 | b'~{NpJ)l6HK!#~}Bye.\n', |
| 64 | 'strict', |
| 65 | 'This sentence is in ASCII.\n' |
| 66 | 'The next sentence is in GB.' |
| 67 | '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002' |
| 68 | 'Bye.\n'), |
| 69 | # test '~\n' (4 lines) |
| 70 | (b'This sentence is in ASCII.\n' |
| 71 | b'The next sentence is in GB.~\n' |
| 72 | b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n' |
| 73 | b'Bye.\n', |
| 74 | 'strict', |
| 75 | 'This sentence is in ASCII.\n' |
| 76 | 'The next sentence is in GB.' |
| 77 | '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002' |
| 78 | 'Bye.\n'), |
| 79 | # invalid bytes |
Victor Stinner | 2cded9c | 2011-07-08 01:45:13 +0200 | [diff] [blame] | 80 | (b'ab~cd', 'replace', 'ab\uFFFDcd'), |
Victor Stinner | 8fdfc20 | 2011-05-25 00:06:51 +0200 | [diff] [blame] | 81 | (b'ab\xffcd', 'replace', 'ab\uFFFDcd'), |
| 82 | (b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'), |
Victor Stinner | 2cded9c | 2011-07-08 01:45:13 +0200 | [diff] [blame] | 83 | (b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'), |
| 84 | (b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"), |
Victor Stinner | 8fdfc20 | 2011-05-25 00:06:51 +0200 | [diff] [blame] | 85 | ) |
| 86 | |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 87 | def test_main(): |
Benjamin Peterson | ee8712c | 2008-05-20 21:35:26 +0000 | [diff] [blame] | 88 | support.run_unittest(__name__) |
Hye-Shik Chang | 3e2a306 | 2004-01-17 14:29:29 +0000 | [diff] [blame] | 89 | |
| 90 | if __name__ == "__main__": |
| 91 | test_main() |