blob: 276b9af0bb86bf62f96372cd6a715ee79f3a66d3 [file] [log] [blame]
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001#!/usr/bin/env python
2#
3# test_multibytecodec.py
4# Unit test for multibytecodec itself
5#
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00006
7from test import test_support
8from test import test_multibytecodec_support
Hye-Shik Chang9f4b6322006-03-26 06:21:34 +00009import unittest, StringIO, codecs, sys
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000010
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +000011class Test_MultibyteCodec(unittest.TestCase):
12
13 def test_nullcoding(self):
14 self.assertEqual(''.decode('gb18030'), u'')
15 self.assertEqual(unicode('', 'gb18030'), u'')
16 self.assertEqual(u''.encode('gb18030'), '')
17
18 def test_str_decode(self):
19 self.assertEqual('abcd'.encode('gb18030'), 'abcd')
20
Hye-Shik Chang9f4b6322006-03-26 06:21:34 +000021 def test_errorcallback_longindex(self):
22 dec = codecs.getdecoder('euc-kr')
23 myreplace = lambda exc: (u'', sys.maxint+1)
24 codecs.register_error('test.cjktest', myreplace)
25 self.assertRaises(IndexError, dec,
26 'apple\x92ham\x93spam', 'test.cjktest')
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +000027
28class Test_IncrementalEncoder(unittest.TestCase):
29
30 def test_stateless(self):
31 # cp949 encoder isn't stateful at all.
32 encoder = codecs.getincrementalencoder('cp949')()
33 self.assertEqual(encoder.encode(u'\ud30c\uc774\uc36c \ub9c8\uc744'),
34 '\xc6\xc4\xc0\xcc\xbd\xe3 \xb8\xb6\xc0\xbb')
35 self.assertEqual(encoder.reset(), None)
36 self.assertEqual(encoder.encode(u'\u2606\u223c\u2606', True),
37 '\xa1\xd9\xa1\xad\xa1\xd9')
38 self.assertEqual(encoder.reset(), None)
39 self.assertEqual(encoder.encode(u'', True), '')
40 self.assertEqual(encoder.encode(u'', False), '')
41 self.assertEqual(encoder.reset(), None)
42
43 def test_stateful(self):
44 # jisx0213 encoder is stateful for a few codepoints. eg)
45 # U+00E6 => A9DC
46 # U+00E6 U+0300 => ABC4
47 # U+0300 => ABDC
48
49 encoder = codecs.getincrementalencoder('jisx0213')()
50 self.assertEqual(encoder.encode(u'\u00e6\u0300'), '\xab\xc4')
51 self.assertEqual(encoder.encode(u'\u00e6'), '')
52 self.assertEqual(encoder.encode(u'\u0300'), '\xab\xc4')
53 self.assertEqual(encoder.encode(u'\u00e6', True), '\xa9\xdc')
54
55 self.assertEqual(encoder.reset(), None)
56 self.assertEqual(encoder.encode(u'\u0300'), '\xab\xdc')
57
58 self.assertEqual(encoder.encode(u'\u00e6'), '')
59 self.assertEqual(encoder.encode('', True), '\xa9\xdc')
60 self.assertEqual(encoder.encode('', True), '')
61
62 def test_stateful_keep_buffer(self):
63 encoder = codecs.getincrementalencoder('jisx0213')()
64 self.assertEqual(encoder.encode(u'\u00e6'), '')
65 self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123')
66 self.assertEqual(encoder.encode(u'\u0300\u00e6'), '\xab\xc4')
67 self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123')
68 self.assertEqual(encoder.reset(), None)
69 self.assertEqual(encoder.encode(u'\u0300'), '\xab\xdc')
70 self.assertEqual(encoder.encode(u'\u00e6'), '')
71 self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123')
72 self.assertEqual(encoder.encode(u'', True), '\xa9\xdc')
73
74
75class Test_IncrementalDecoder(unittest.TestCase):
76
77 def test_dbcs(self):
78 # cp949 decoder is simple with only 1 or 2 bytes sequences.
79 decoder = codecs.getincrementaldecoder('cp949')()
80 self.assertEqual(decoder.decode('\xc6\xc4\xc0\xcc\xbd'),
81 u'\ud30c\uc774')
82 self.assertEqual(decoder.decode('\xe3 \xb8\xb6\xc0\xbb'),
83 u'\uc36c \ub9c8\uc744')
84 self.assertEqual(decoder.decode(''), u'')
85
86 def test_dbcs_keep_buffer(self):
87 decoder = codecs.getincrementaldecoder('cp949')()
88 self.assertEqual(decoder.decode('\xc6\xc4\xc0'), u'\ud30c')
89 self.assertRaises(UnicodeDecodeError, decoder.decode, '', True)
90 self.assertEqual(decoder.decode('\xcc'), u'\uc774')
91
92 self.assertEqual(decoder.decode('\xc6\xc4\xc0'), u'\ud30c')
93 self.assertRaises(UnicodeDecodeError, decoder.decode, '\xcc\xbd', True)
94 self.assertEqual(decoder.decode('\xcc'), u'\uc774')
95
96 def test_iso2022(self):
97 decoder = codecs.getincrementaldecoder('iso2022-jp')()
98 ESC = '\x1b'
99 self.assertEqual(decoder.decode(ESC + '('), u'')
100 self.assertEqual(decoder.decode('B', True), u'')
101 self.assertEqual(decoder.decode(ESC + '$'), u'')
102 self.assertEqual(decoder.decode('B@$'), u'\u4e16')
103 self.assertEqual(decoder.decode('@$@'), u'\u4e16')
104 self.assertEqual(decoder.decode('$', True), u'\u4e16')
105 self.assertEqual(decoder.reset(), None)
106 self.assertEqual(decoder.decode('@$'), u'@$')
107 self.assertEqual(decoder.decode(ESC + '$'), u'')
108 self.assertRaises(UnicodeDecodeError, decoder.decode, '', True)
109 self.assertEqual(decoder.decode('B@$'), u'\u4e16')
110
111
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000112class Test_StreamWriter(unittest.TestCase):
113 if len(u'\U00012345') == 2: # UCS2
114 def test_gb18030(self):
115 s= StringIO.StringIO()
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000116 c = codecs.getwriter('gb18030')(s)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000117 c.write(u'123')
118 self.assertEqual(s.getvalue(), '123')
119 c.write(u'\U00012345')
120 self.assertEqual(s.getvalue(), '123\x907\x959')
121 c.write(u'\U00012345'[0])
122 self.assertEqual(s.getvalue(), '123\x907\x959')
123 c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac')
124 self.assertEqual(s.getvalue(),
125 '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
126 c.write(u'\U00012345'[0])
127 self.assertEqual(s.getvalue(),
128 '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
129 self.assertRaises(UnicodeError, c.reset)
130 self.assertEqual(s.getvalue(),
131 '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
132
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000133 def test_utf_8(self):
134 s= StringIO.StringIO()
135 c = codecs.getwriter('utf-8')(s)
136 c.write(u'123')
137 self.assertEqual(s.getvalue(), '123')
138 c.write(u'\U00012345')
139 self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
140
141 # Python utf-8 codec can't buffer surrogate pairs yet.
142 if 0:
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000143 c.write(u'\U00012345'[0])
144 self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
145 c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac')
146 self.assertEqual(s.getvalue(),
147 '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
148 '\xea\xb0\x80\xc2\xac')
149 c.write(u'\U00012345'[0])
150 self.assertEqual(s.getvalue(),
151 '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
152 '\xea\xb0\x80\xc2\xac')
153 c.reset()
154 self.assertEqual(s.getvalue(),
155 '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
156 '\xea\xb0\x80\xc2\xac\xed\xa0\x88')
157 c.write(u'\U00012345'[1])
158 self.assertEqual(s.getvalue(),
159 '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
160 '\xea\xb0\x80\xc2\xac\xed\xa0\x88\xed\xbd\x85')
161
162 else: # UCS4
163 pass
164
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000165 def test_streamwriter_strwrite(self):
166 s = StringIO.StringIO()
167 wr = codecs.getwriter('gb18030')(s)
168 wr.write('abcd')
169 self.assertEqual(s.getvalue(), 'abcd')
170
Hye-Shik Changabb903f2006-03-13 10:20:08 +0000171class Test_ISO2022(unittest.TestCase):
172 def test_g2(self):
173 iso2022jp2 = '\x1b(B:hu4:unit\x1b.A\x1bNi de famille'
174 uni = u':hu4:unit\xe9 de famille'
175 self.assertEqual(iso2022jp2.decode('iso2022-jp-2'), uni)
176
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000177def test_main():
178 suite = unittest.TestSuite()
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000179 suite.addTest(unittest.makeSuite(Test_MultibyteCodec))
180 suite.addTest(unittest.makeSuite(Test_IncrementalEncoder))
181 suite.addTest(unittest.makeSuite(Test_IncrementalDecoder))
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000182 suite.addTest(unittest.makeSuite(Test_StreamWriter))
Hye-Shik Changabb903f2006-03-13 10:20:08 +0000183 suite.addTest(unittest.makeSuite(Test_ISO2022))
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000184 test_support.run_suite(suite)
185
186if __name__ == "__main__":
187 test_main()