blob: 8f9f6e970b4725426ea59812aac0018ac6f41c0a [file] [log] [blame]
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001#!/usr/bin/env python
2#
3# test_multibytecodec.py
4# Unit test for multibytecodec itself
5#
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00006# $CJKCodecs: test_multibytecodec.py,v 1.8 2004/06/19 06:09:55 perky Exp $
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00007
8from test import test_support
9from test import test_multibytecodec_support
10import unittest, StringIO, codecs
11
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +000012class Test_MultibyteCodec(unittest.TestCase):
13
14 def test_nullcoding(self):
15 self.assertEqual(''.decode('gb18030'), u'')
16 self.assertEqual(unicode('', 'gb18030'), u'')
17 self.assertEqual(u''.encode('gb18030'), '')
18
19 def test_str_decode(self):
20 self.assertEqual('abcd'.encode('gb18030'), 'abcd')
21
22
23class Test_IncrementalEncoder(unittest.TestCase):
24
25 def test_stateless(self):
26 # cp949 encoder isn't stateful at all.
27 encoder = codecs.getincrementalencoder('cp949')()
28 self.assertEqual(encoder.encode(u'\ud30c\uc774\uc36c \ub9c8\uc744'),
29 '\xc6\xc4\xc0\xcc\xbd\xe3 \xb8\xb6\xc0\xbb')
30 self.assertEqual(encoder.reset(), None)
31 self.assertEqual(encoder.encode(u'\u2606\u223c\u2606', True),
32 '\xa1\xd9\xa1\xad\xa1\xd9')
33 self.assertEqual(encoder.reset(), None)
34 self.assertEqual(encoder.encode(u'', True), '')
35 self.assertEqual(encoder.encode(u'', False), '')
36 self.assertEqual(encoder.reset(), None)
37
38 def test_stateful(self):
39 # jisx0213 encoder is stateful for a few codepoints. eg)
40 # U+00E6 => A9DC
41 # U+00E6 U+0300 => ABC4
42 # U+0300 => ABDC
43
44 encoder = codecs.getincrementalencoder('jisx0213')()
45 self.assertEqual(encoder.encode(u'\u00e6\u0300'), '\xab\xc4')
46 self.assertEqual(encoder.encode(u'\u00e6'), '')
47 self.assertEqual(encoder.encode(u'\u0300'), '\xab\xc4')
48 self.assertEqual(encoder.encode(u'\u00e6', True), '\xa9\xdc')
49
50 self.assertEqual(encoder.reset(), None)
51 self.assertEqual(encoder.encode(u'\u0300'), '\xab\xdc')
52
53 self.assertEqual(encoder.encode(u'\u00e6'), '')
54 self.assertEqual(encoder.encode('', True), '\xa9\xdc')
55 self.assertEqual(encoder.encode('', True), '')
56
57 def test_stateful_keep_buffer(self):
58 encoder = codecs.getincrementalencoder('jisx0213')()
59 self.assertEqual(encoder.encode(u'\u00e6'), '')
60 self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123')
61 self.assertEqual(encoder.encode(u'\u0300\u00e6'), '\xab\xc4')
62 self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123')
63 self.assertEqual(encoder.reset(), None)
64 self.assertEqual(encoder.encode(u'\u0300'), '\xab\xdc')
65 self.assertEqual(encoder.encode(u'\u00e6'), '')
66 self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123')
67 self.assertEqual(encoder.encode(u'', True), '\xa9\xdc')
68
69
70class Test_IncrementalDecoder(unittest.TestCase):
71
72 def test_dbcs(self):
73 # cp949 decoder is simple with only 1 or 2 bytes sequences.
74 decoder = codecs.getincrementaldecoder('cp949')()
75 self.assertEqual(decoder.decode('\xc6\xc4\xc0\xcc\xbd'),
76 u'\ud30c\uc774')
77 self.assertEqual(decoder.decode('\xe3 \xb8\xb6\xc0\xbb'),
78 u'\uc36c \ub9c8\uc744')
79 self.assertEqual(decoder.decode(''), u'')
80
81 def test_dbcs_keep_buffer(self):
82 decoder = codecs.getincrementaldecoder('cp949')()
83 self.assertEqual(decoder.decode('\xc6\xc4\xc0'), u'\ud30c')
84 self.assertRaises(UnicodeDecodeError, decoder.decode, '', True)
85 self.assertEqual(decoder.decode('\xcc'), u'\uc774')
86
87 self.assertEqual(decoder.decode('\xc6\xc4\xc0'), u'\ud30c')
88 self.assertRaises(UnicodeDecodeError, decoder.decode, '\xcc\xbd', True)
89 self.assertEqual(decoder.decode('\xcc'), u'\uc774')
90
91 def test_iso2022(self):
92 decoder = codecs.getincrementaldecoder('iso2022-jp')()
93 ESC = '\x1b'
94 self.assertEqual(decoder.decode(ESC + '('), u'')
95 self.assertEqual(decoder.decode('B', True), u'')
96 self.assertEqual(decoder.decode(ESC + '$'), u'')
97 self.assertEqual(decoder.decode('B@$'), u'\u4e16')
98 self.assertEqual(decoder.decode('@$@'), u'\u4e16')
99 self.assertEqual(decoder.decode('$', True), u'\u4e16')
100 self.assertEqual(decoder.reset(), None)
101 self.assertEqual(decoder.decode('@$'), u'@$')
102 self.assertEqual(decoder.decode(ESC + '$'), u'')
103 self.assertRaises(UnicodeDecodeError, decoder.decode, '', True)
104 self.assertEqual(decoder.decode('B@$'), u'\u4e16')
105
106
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000107class Test_StreamWriter(unittest.TestCase):
108 if len(u'\U00012345') == 2: # UCS2
109 def test_gb18030(self):
110 s= StringIO.StringIO()
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000111 c = codecs.getwriter('gb18030')(s)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000112 c.write(u'123')
113 self.assertEqual(s.getvalue(), '123')
114 c.write(u'\U00012345')
115 self.assertEqual(s.getvalue(), '123\x907\x959')
116 c.write(u'\U00012345'[0])
117 self.assertEqual(s.getvalue(), '123\x907\x959')
118 c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac')
119 self.assertEqual(s.getvalue(),
120 '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
121 c.write(u'\U00012345'[0])
122 self.assertEqual(s.getvalue(),
123 '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
124 self.assertRaises(UnicodeError, c.reset)
125 self.assertEqual(s.getvalue(),
126 '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
127
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000128 def test_utf_8(self):
129 s= StringIO.StringIO()
130 c = codecs.getwriter('utf-8')(s)
131 c.write(u'123')
132 self.assertEqual(s.getvalue(), '123')
133 c.write(u'\U00012345')
134 self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
135
136 # Python utf-8 codec can't buffer surrogate pairs yet.
137 if 0:
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000138 c.write(u'\U00012345'[0])
139 self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
140 c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac')
141 self.assertEqual(s.getvalue(),
142 '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
143 '\xea\xb0\x80\xc2\xac')
144 c.write(u'\U00012345'[0])
145 self.assertEqual(s.getvalue(),
146 '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
147 '\xea\xb0\x80\xc2\xac')
148 c.reset()
149 self.assertEqual(s.getvalue(),
150 '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
151 '\xea\xb0\x80\xc2\xac\xed\xa0\x88')
152 c.write(u'\U00012345'[1])
153 self.assertEqual(s.getvalue(),
154 '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
155 '\xea\xb0\x80\xc2\xac\xed\xa0\x88\xed\xbd\x85')
156
157 else: # UCS4
158 pass
159
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000160 def test_streamwriter_strwrite(self):
161 s = StringIO.StringIO()
162 wr = codecs.getwriter('gb18030')(s)
163 wr.write('abcd')
164 self.assertEqual(s.getvalue(), 'abcd')
165
Hye-Shik Changabb903f2006-03-13 10:20:08 +0000166class Test_ISO2022(unittest.TestCase):
167 def test_g2(self):
168 iso2022jp2 = '\x1b(B:hu4:unit\x1b.A\x1bNi de famille'
169 uni = u':hu4:unit\xe9 de famille'
170 self.assertEqual(iso2022jp2.decode('iso2022-jp-2'), uni)
171
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000172def test_main():
173 suite = unittest.TestSuite()
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000174 suite.addTest(unittest.makeSuite(Test_MultibyteCodec))
175 suite.addTest(unittest.makeSuite(Test_IncrementalEncoder))
176 suite.addTest(unittest.makeSuite(Test_IncrementalDecoder))
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000177 suite.addTest(unittest.makeSuite(Test_StreamWriter))
Hye-Shik Changabb903f2006-03-13 10:20:08 +0000178 suite.addTest(unittest.makeSuite(Test_ISO2022))
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000179 test_support.run_suite(suite)
180
181if __name__ == "__main__":
182 test_main()