blob: b51b489706c5f9d03871500f35db0f358fc8347f [file] [log] [blame]
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001import test.test_support, unittest
2import sys, codecs, htmlentitydefs, unicodedata
3
4class CodecCallbackTest(unittest.TestCase):
5
6 def test_xmlcharrefreplace(self):
7 # replace unencodable characters which numeric character entities.
8 # For ascii, latin-1 and charmaps this is completely implemented
9 # in C and should be reasonably fast.
10 s = u"\u30b9\u30d1\u30e2 \xe4nd eggs"
11 self.assertEqual(
12 s.encode("ascii", "xmlcharrefreplace"),
13 "スパモ änd eggs"
14 )
15 self.assertEqual(
16 s.encode("latin-1", "xmlcharrefreplace"),
17 "スパモ \xe4nd eggs"
18 )
19
20 def test_xmlcharnamereplace(self):
21 # This time use a named character entity for unencodable
22 # characters, if one is available.
23 names = {}
24 for (key, value) in htmlentitydefs.entitydefs.items():
25 if len(value)==1:
26 names[unicode(value, "latin-1")] = unicode(key, "latin-1")
27 else:
28 names[unichr(int(value[2:-1]))] = unicode(key, "latin-1")
29
30 def xmlcharnamereplace(exc):
31 if not isinstance(exc, UnicodeEncodeError):
32 raise TypeError("don't know how to handle %r" % exc)
33 l = []
34 for c in exc.object[exc.start:exc.end]:
35 try:
36 l.append(u"&%s;" % names[c])
37 except KeyError:
38 l.append(u"&#%d;" % ord(c))
39 return (u"".join(l), exc.end)
40
41 codecs.register_error(
42 "test.xmlcharnamereplace", xmlcharnamereplace)
43
44 sin = u"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a"
45 sout = "«ℜ» = ⟨ሴ€⟩"
46 self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout)
47 sout = "\xabℜ\xbb = ⟨ሴ€⟩"
48 self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout)
49 sout = "\xabℜ\xbb = ⟨ሴ\xa4⟩"
50 self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout)
51
52 def test_uninamereplace(self):
53 # We're using the names from the unicode database this time,
Walter Dörwald00445d22002-11-25 17:58:02 +000054 # and we're doing "syntax highlighting" here, i.e. we include
Walter Dörwald3aeb6322002-09-02 13:14:32 +000055 # the replaced text in ANSI escape sequences. For this it is
56 # useful that the error handler is not called for every single
57 # unencodable character, but for a complete sequence of
58 # unencodable characters, otherwise we would output many
59 # unneccessary escape sequences.
60
61 def uninamereplace(exc):
62 if not isinstance(exc, UnicodeEncodeError):
63 raise TypeError("don't know how to handle %r" % exc)
64 l = []
65 for c in exc.object[exc.start:exc.end]:
66 l.append(unicodedata.name(c, u"0x%x" % ord(c)))
67 return (u"\033[1m%s\033[0m" % u", ".join(l), exc.end)
68
69 codecs.register_error(
70 "test.uninamereplace", uninamereplace)
71
72 sin = u"\xac\u1234\u20ac\u8000"
Martin v. Löwis74a530d2002-11-23 19:41:01 +000073 sout = "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
Walter Dörwald3aeb6322002-09-02 13:14:32 +000074 self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout)
75
Martin v. Löwis74a530d2002-11-23 19:41:01 +000076 sout = "\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
Walter Dörwald3aeb6322002-09-02 13:14:32 +000077 self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout)
78
Martin v. Löwis74a530d2002-11-23 19:41:01 +000079 sout = "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m"
Walter Dörwald3aeb6322002-09-02 13:14:32 +000080 self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout)
81
82 def test_backslashescape(self):
83 # Does the same as the "unicode-escape" encoding, but with different
84 # base encodings.
85 sin = u"a\xac\u1234\u20ac\u8000"
86 if sys.maxunicode > 0xffff:
87 sin += unichr(sys.maxunicode)
88 sout = "a\\xac\\u1234\\u20ac\\u8000"
89 if sys.maxunicode > 0xffff:
90 sout += "\\U%08x" % sys.maxunicode
91 self.assertEqual(sin.encode("ascii", "backslashreplace"), sout)
92
93 sout = "a\xac\\u1234\\u20ac\\u8000"
94 if sys.maxunicode > 0xffff:
95 sout += "\\U%08x" % sys.maxunicode
96 self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout)
97
98 sout = "a\xac\\u1234\xa4\\u8000"
99 if sys.maxunicode > 0xffff:
100 sout += "\\U%08x" % sys.maxunicode
101 self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
102
103 def test_relaxedutf8(self):
104 # This is the test for a decoding callback handler,
105 # that relaxes the UTF-8 minimal encoding restriction.
106 # A null byte that is encoded as "\xc0\x80" will be
107 # decoded as a null byte. All other illegal sequences
108 # will be handled strictly.
109 def relaxedutf8(exc):
110 if not isinstance(exc, UnicodeDecodeError):
111 raise TypeError("don't know how to handle %r" % exc)
112 if exc.object[exc.start:exc.end].startswith("\xc0\x80"):
113 return (u"\x00", exc.start+2) # retry after two bytes
114 else:
115 raise exc
116
117 codecs.register_error(
118 "test.relaxedutf8", relaxedutf8)
119
120 sin = "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80"
121 sout = u"a\x00b\x00c\xfc\x00\x00"
122 self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout)
123 sin = "\xc0\x80\xc0\x81"
124 self.assertRaises(UnicodeError, sin.decode, "utf-8", "test.relaxedutf8")
125
126 def test_charmapencode(self):
127 # For charmap encodings the replacement string will be
128 # mapped through the encoding again. This means, that
129 # to be able to use e.g. the "replace" handler, the
130 # charmap has to have a mapping for "?".
131 charmap = dict([ (ord(c), 2*c.upper()) for c in "abcdefgh"])
132 sin = u"abc"
133 sout = "AABBCC"
134 self.assertEquals(codecs.charmap_encode(sin, "strict", charmap)[0], sout)
135
136 sin = u"abcA"
137 self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap)
138
139 charmap[ord("?")] = "XYZ"
140 sin = u"abcDEF"
141 sout = "AABBCCXYZXYZXYZ"
142 self.assertEquals(codecs.charmap_encode(sin, "replace", charmap)[0], sout)
143
144 charmap[ord("?")] = u"XYZ"
145 self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
146
147 charmap[ord("?")] = u"XYZ"
148 self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
149
150 def test_callbacks(self):
151 def handler1(exc):
152 if not isinstance(exc, UnicodeEncodeError) \
153 and not isinstance(exc, UnicodeDecodeError):
154 raise TypeError("don't know how to handle %r" % exc)
155 l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
156 return (u"[%s]" % u"".join(l), exc.end)
157
158 codecs.register_error("test.handler1", handler1)
159
160 def handler2(exc):
161 if not isinstance(exc, UnicodeDecodeError):
162 raise TypeError("don't know how to handle %r" % exc)
163 l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
164 return (u"[%s]" % u"".join(l), exc.end+1) # skip one character
165
166 codecs.register_error("test.handler2", handler2)
167
168 s = "\x00\x81\x7f\x80\xff"
169
170 self.assertEqual(
171 s.decode("ascii", "test.handler1"),
172 u"\x00[<129>]\x7f[<128>][<255>]"
173 )
174 self.assertEqual(
175 s.decode("ascii", "test.handler2"),
176 u"\x00[<129>][<128>]"
177 )
178
179 self.assertEqual(
180 "\\u3042\u3xxx".decode("unicode-escape", "test.handler1"),
181 u"\u3042[<92><117><51><120>]xx"
182 )
183
184 self.assertEqual(
185 "\\u3042\u3xx".decode("unicode-escape", "test.handler1"),
186 u"\u3042[<92><117><51><120><120>]"
187 )
188
189 self.assertEqual(
190 codecs.charmap_decode("abc", "test.handler1", {ord("a"): u"z"})[0],
191 u"z[<98>][<99>]"
192 )
193
194 self.assertEqual(
195 u"g\xfc\xdfrk".encode("ascii", "test.handler1"),
196 u"g[<252><223>]rk"
197 )
198
199 self.assertEqual(
200 u"g\xfc\xdf".encode("ascii", "test.handler1"),
201 u"g[<252><223>]"
202 )
203
204 def test_longstrings(self):
205 # test long strings to check for memory overflow problems
206 errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", "backslashreplace"]
207 # register the handlers under different names,
208 # to prevent the codec from recognizing the name
209 for err in errors:
210 codecs.register_error("test." + err, codecs.lookup_error(err))
211 l = 1000
212 errors += [ "test." + err for err in errors ]
213 for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]:
214 for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", "utf-8", "utf-7", "utf-16"):
215 for err in errors:
Tim Peters3de75262002-11-09 05:26:15 +0000216 try:
217 uni.encode(enc, err)
218 except UnicodeError:
219 pass
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000220
221 def check_exceptionobjectargs(self, exctype, args, msg):
222 # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion
223 # check with one missing argument
224 self.assertRaises(TypeError, exctype, *args[:-1])
Walter Dörwaldea4250d2003-01-20 02:34:07 +0000225 # check with one argument too much
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000226 self.assertRaises(TypeError, exctype, *(args + ["too much"]))
227 # check with one argument of the wrong type
228 wrongargs = [ "spam", u"eggs", 42, 1.0, None ]
229 for i in xrange(len(args)):
230 for wrongarg in wrongargs:
231 if type(wrongarg) is type(args[i]):
Tim Peters3de75262002-11-09 05:26:15 +0000232 continue
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000233 # build argument array
234 callargs = []
235 for j in xrange(len(args)):
236 if i==j:
237 callargs.append(wrongarg)
238 else:
239 callargs.append(args[i])
240 self.assertRaises(TypeError, exctype, *callargs)
Walter Dörwaldea4250d2003-01-20 02:34:07 +0000241
242 # check with the correct number and type of arguments
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000243 exc = exctype(*args)
244 self.assertEquals(str(exc), msg)
245
246 def test_unicodeencodeerror(self):
247 self.check_exceptionobjectargs(
248 UnicodeEncodeError,
249 ["ascii", u"g\xfcrk", 1, 2, "ouch"],
250 "'ascii' codec can't encode character '\ufc' in position 1: ouch"
251 )
252 self.check_exceptionobjectargs(
253 UnicodeEncodeError,
254 ["ascii", u"g\xfcrk", 1, 4, "ouch"],
255 "'ascii' codec can't encode characters in position 1-3: ouch"
256 )
257 self.check_exceptionobjectargs(
258 UnicodeEncodeError,
259 ["ascii", u"\xfcx", 0, 1, "ouch"],
260 "'ascii' codec can't encode character '\ufc' in position 0: ouch"
261 )
262
263 def test_unicodedecodeerror(self):
264 self.check_exceptionobjectargs(
265 UnicodeDecodeError,
266 ["ascii", "g\xfcrk", 1, 2, "ouch"],
267 "'ascii' codec can't decode byte 0xfc in position 1: ouch"
268 )
269 self.check_exceptionobjectargs(
270 UnicodeDecodeError,
271 ["ascii", "g\xfcrk", 1, 3, "ouch"],
272 "'ascii' codec can't decode bytes in position 1-2: ouch"
273 )
274
275 def test_unicodetranslateerror(self):
276 self.check_exceptionobjectargs(
277 UnicodeTranslateError,
278 [u"g\xfcrk", 1, 2, "ouch"],
279 "can't translate character '\\ufc' in position 1: ouch"
280 )
281 self.check_exceptionobjectargs(
282 UnicodeTranslateError,
283 [u"g\xfcrk", 1, 3, "ouch"],
284 "can't translate characters in position 1-2: ouch"
285 )
286
287 def test_badandgoodstrictexceptions(self):
Walter Dörwaldea4250d2003-01-20 02:34:07 +0000288 # "strict" complains about a non-exception passed in
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000289 self.assertRaises(
290 TypeError,
291 codecs.strict_errors,
292 42
293 )
Walter Dörwaldea4250d2003-01-20 02:34:07 +0000294 # "strict" complains about the wrong exception type
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000295 self.assertRaises(
296 Exception,
297 codecs.strict_errors,
298 Exception("ouch")
299 )
300
Walter Dörwaldea4250d2003-01-20 02:34:07 +0000301 # If the correct exception is passed in, "strict" raises it
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000302 self.assertRaises(
303 UnicodeEncodeError,
304 codecs.strict_errors,
305 UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")
306 )
307
308 def test_badandgoodignoreexceptions(self):
Walter Dörwaldea4250d2003-01-20 02:34:07 +0000309 # "ignore" complains about a non-exception passed in
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000310 self.assertRaises(
311 TypeError,
312 codecs.ignore_errors,
313 42
314 )
Walter Dörwaldea4250d2003-01-20 02:34:07 +0000315 # "ignore" complains about the wrong exception type
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000316 self.assertRaises(
317 TypeError,
318 codecs.ignore_errors,
319 UnicodeError("ouch")
320 )
Walter Dörwaldea4250d2003-01-20 02:34:07 +0000321 # If the correct exception is passed in, "ignore" returns an empty replacement
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000322 self.assertEquals(
323 codecs.ignore_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
324 (u"", 1)
325 )
326 self.assertEquals(
327 codecs.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
328 (u"", 1)
329 )
330 self.assertEquals(
331 codecs.ignore_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
332 (u"", 1)
333 )
334
335 def test_badandgoodreplaceexceptions(self):
Walter Dörwaldea4250d2003-01-20 02:34:07 +0000336 # "replace" complains about a non-exception passed in
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000337 self.assertRaises(
338 TypeError,
339 codecs.replace_errors,
340 42
341 )
Walter Dörwaldea4250d2003-01-20 02:34:07 +0000342 # "replace" complains about the wrong exception type
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000343 self.assertRaises(
344 TypeError,
345 codecs.replace_errors,
346 UnicodeError("ouch")
347 )
Walter Dörwaldea4250d2003-01-20 02:34:07 +0000348 # With the correct exception, "ignore" returns an empty replacement
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000349 self.assertEquals(
350 codecs.replace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
351 (u"?", 1)
352 )
353 self.assertEquals(
354 codecs.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
355 (u"\ufffd", 1)
356 )
357 self.assertEquals(
358 codecs.replace_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
359 (u"\ufffd", 1)
360 )
361
362 def test_badandgoodxmlcharrefreplaceexceptions(self):
Walter Dörwaldea4250d2003-01-20 02:34:07 +0000363 # "xmlcharrefreplace" complains about a non-exception passed in
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000364 self.assertRaises(
365 TypeError,
366 codecs.xmlcharrefreplace_errors,
367 42
368 )
Walter Dörwaldea4250d2003-01-20 02:34:07 +0000369 # "xmlcharrefreplace" complains about the wrong exception types
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000370 self.assertRaises(
371 TypeError,
372 codecs.xmlcharrefreplace_errors,
373 UnicodeError("ouch")
374 )
Walter Dörwaldea4250d2003-01-20 02:34:07 +0000375 # "xmlcharrefreplace" can only be used for encoding
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000376 self.assertRaises(
377 TypeError,
378 codecs.xmlcharrefreplace_errors,
379 UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
380 )
381 self.assertRaises(
382 TypeError,
383 codecs.xmlcharrefreplace_errors,
384 UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
385 )
Walter Dörwaldea4250d2003-01-20 02:34:07 +0000386 # Use the correct exception
387 self.assertEquals(
388 codecs.xmlcharrefreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
389 (u"&#%d;" % 0x3042, 1)
390 )
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000391
392 def test_badandgoodbackslashreplaceexceptions(self):
Walter Dörwaldea4250d2003-01-20 02:34:07 +0000393 # "backslashreplace" complains about a non-exception passed in
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000394 self.assertRaises(
395 TypeError,
396 codecs.backslashreplace_errors,
397 42
398 )
Walter Dörwaldea4250d2003-01-20 02:34:07 +0000399 # "backslashreplace" complains about the wrong exception types
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000400 self.assertRaises(
401 TypeError,
402 codecs.backslashreplace_errors,
403 UnicodeError("ouch")
404 )
Walter Dörwaldea4250d2003-01-20 02:34:07 +0000405 # "backslashreplace" can only be used for encoding
406 self.assertRaises(
407 TypeError,
408 codecs.backslashreplace_errors,
409 UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
410 )
411 self.assertRaises(
412 TypeError,
413 codecs.backslashreplace_errors,
414 UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
415 )
416 # Use the correct exception
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000417 self.assertEquals(
418 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
419 (u"\\u3042", 1)
420 )
421 self.assertEquals(
422 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")),
423 (u"\\x00", 1)
424 )
425 self.assertEquals(
426 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")),
427 (u"\\xff", 1)
428 )
429 self.assertEquals(
430 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")),
431 (u"\\u0100", 1)
432 )
433 self.assertEquals(
434 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")),
435 (u"\\uffff", 1)
436 )
437 if sys.maxunicode>0xffff:
438 self.assertEquals(
439 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")),
440 (u"\\U00010000", 1)
441 )
442 self.assertEquals(
443 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")),
444 (u"\\U0010ffff", 1)
445 )
446
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000447 def test_badhandlerresults(self):
448 results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
449 encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")
450
451 for res in results:
452 codecs.register_error("test.badhandler", lambda: res)
453 for enc in encs:
454 self.assertRaises(
455 TypeError,
456 u"\u3042".encode,
457 enc,
458 "test.badhandler"
459 )
460 for (enc, bytes) in (
461 ("ascii", "\xff"),
462 ("utf-8", "\xff"),
463 ("utf-7", "+x-")
464 ):
465 self.assertRaises(
466 TypeError,
467 bytes.decode,
468 enc,
469 "test.badhandler"
470 )
471
472 def test_lookup(self):
473 self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
474 self.assertEquals(codecs.ignore_errors, codecs.lookup_error("ignore"))
475 self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
476 self.assertEquals(
477 codecs.xmlcharrefreplace_errors,
478 codecs.lookup_error("xmlcharrefreplace")
479 )
480 self.assertEquals(
481 codecs.backslashreplace_errors,
482 codecs.lookup_error("backslashreplace")
483 )
484
Walter Dörwald9ab7dd42002-09-06 17:21:40 +0000485 def test_unencodablereplacement(self):
486 def unencrepl(exc):
487 if isinstance(exc, UnicodeEncodeError):
488 return (u"\u4242", exc.end)
489 else:
490 raise TypeError("don't know how to handle %r" % exc)
491 codecs.register_error("test.unencreplhandler", unencrepl)
492 for enc in ("ascii", "iso-8859-1", "iso-8859-15"):
493 self.assertRaises(
494 UnicodeEncodeError,
495 u"\u4242".encode,
496 enc,
497 "test.unencreplhandler"
498 )
499
Walter Dörwald30537a42003-01-08 23:22:13 +0000500 def test_badregistercall(self):
501 # enhance coverage of:
502 # Modules/_codecsmodule.c::register_error()
503 # Python/codecs.c::PyCodec_RegisterError()
504 self.assertRaises(TypeError, codecs.register_error, 42)
505 self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42)
506
507 def test_unknownhandler(self):
508 # enhance coverage of:
509 # Modules/_codecsmodule.c::lookup_error()
510 self.assertRaises(LookupError, codecs.lookup_error, "test.unknown")
511
512 def test_xmlcharrefvalues(self):
513 # enhance coverage of:
514 # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
515 # and inline implementations
516 v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000)
Walter Dörwald0cb27dd2003-01-09 11:38:50 +0000517 if sys.maxunicode>=100000:
Walter Dörwald30537a42003-01-08 23:22:13 +0000518 v += (100000, 500000, 1000000)
519 s = u"".join([unichr(x) for x in v])
520 codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors)
521 for enc in ("ascii", "iso-8859-15"):
522 for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"):
523 s.encode(enc, err)
524
525 def test_decodehelper(self):
526 # enhance coverage of:
527 # Objects/unicodeobject.c::unicode_decode_call_errorhandler()
528 # and callers
529 self.assertRaises(LookupError, "\xff".decode, "ascii", "test.unknown")
530
531 def baddecodereturn1(exc):
532 return 42
533 codecs.register_error("test.baddecodereturn1", baddecodereturn1)
534 self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn1")
535 self.assertRaises(TypeError, "\\".decode, "unicode-escape", "test.baddecodereturn1")
536 self.assertRaises(TypeError, "\\x0".decode, "unicode-escape", "test.baddecodereturn1")
537 self.assertRaises(TypeError, "\\x0y".decode, "unicode-escape", "test.baddecodereturn1")
538 self.assertRaises(TypeError, "\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1")
539 self.assertRaises(TypeError, "\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1")
540
541 def baddecodereturn2(exc):
542 return (u"?", None)
543 codecs.register_error("test.baddecodereturn2", baddecodereturn2)
544 self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn2")
545
546 pos = [-42]
547 def negposreturn(exc):
548 pos[0] += 1 # use list to work around scoping problem
549 return (u"?", pos[0])
550 codecs.register_error("test.negposreturn", negposreturn)
551 "\xff".decode("ascii", "test.negposreturn")
552
553 def hugeposreturn(exc):
554 return (u"?", 424242)
555 codecs.register_error("test.hugeposreturn", hugeposreturn)
556 "\xff".decode("ascii", "test.hugeposreturn")
557 "\\uyyyy".decode("raw-unicode-escape", "test.hugeposreturn")
558
559 class D(dict):
560 def __getitem__(self, key):
561 raise ValueError
562 self.assertRaises(UnicodeError, codecs.charmap_decode, "\xff", "strict", {0xff: None})
563 self.assertRaises(ValueError, codecs.charmap_decode, "\xff", "strict", D())
564 self.assertRaises(TypeError, codecs.charmap_decode, "\xff", "strict", {0xff: sys.maxunicode+1})
565
566 def test_encodehelper(self):
567 # enhance coverage of:
568 # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
569 # and callers
570 self.assertRaises(LookupError, u"\xff".encode, "ascii", "test.unknown")
571
572 def badencodereturn1(exc):
573 return 42
574 codecs.register_error("test.badencodereturn1", badencodereturn1)
575 self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn1")
576
577 def badencodereturn2(exc):
578 return (u"?", None)
579 codecs.register_error("test.badencodereturn2", badencodereturn2)
580 self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn2")
581
582 pos = [-42]
583 def negposreturn(exc):
584 pos[0] += 1 # use list to work around scoping problem
585 return (u"?", pos[0])
586 codecs.register_error("test.negposreturn", negposreturn)
587 u"\xff".encode("ascii", "test.negposreturn")
588
589 def hugeposreturn(exc):
590 return (u"?", 424242)
591 codecs.register_error("test.hugeposreturn", hugeposreturn)
592 u"\xff".encode("ascii", "test.hugeposreturn")
593
594 class D(dict):
595 def __getitem__(self, key):
596 raise ValueError
597 for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.hugeposreturn"):
598 self.assertRaises(UnicodeError, codecs.charmap_encode, u"\xff", err, {0xff: None})
599 self.assertRaises(ValueError, codecs.charmap_encode, u"\xff", err, D())
600 self.assertRaises(TypeError, codecs.charmap_encode, u"\xff", err, {0xff: 300})
601
602 def test_translatehelper(self):
603 # enhance coverage of:
604 # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
605 # and callers
606 # (Unfortunately the errors argument is not directly accessible
607 # from Python, so we can't test that much)
608 class D(dict):
609 def __getitem__(self, key):
610 raise ValueError
611 self.assertRaises(ValueError, u"\xff".translate, D())
612 self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1})
613 self.assertRaises(TypeError, u"\xff".translate, {0xff: ()})
614
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000615def test_main():
616 suite = unittest.TestSuite()
617 suite.addTest(unittest.makeSuite(CodecCallbackTest))
618 test.test_support.run_suite(suite)
619
620if __name__ == "__main__":
621 test_main()