| Benjamin Peterson | ee8712c | 2008-05-20 21:35:26 +0000 | [diff] [blame] | 1 | import test.support, unittest | 
| Fred Drake | 3c50ea4 | 2008-05-17 22:02:32 +0000 | [diff] [blame] | 2 | import sys, codecs, html.entities, unicodedata | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 3 |  | 
| Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 4 | class PosReturn: | 
 | 5 |     # this can be used for configurable callbacks | 
 | 6 |  | 
 | 7 |     def __init__(self): | 
 | 8 |         self.pos = 0 | 
 | 9 |  | 
 | 10 |     def handle(self, exc): | 
 | 11 |         oldpos = self.pos | 
 | 12 |         realpos = oldpos | 
 | 13 |         if realpos<0: | 
| Tim Peters | f2715e0 | 2003-02-19 02:35:07 +0000 | [diff] [blame] | 14 |             realpos = len(exc.object) + realpos | 
| Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 15 |         # if we don't advance this time, terminate on the next call | 
 | 16 |         # otherwise we'd get an endless loop | 
 | 17 |         if realpos <= exc.start: | 
 | 18 |             self.pos = len(exc.object) | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 19 |         return ("<?>", oldpos) | 
| Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 20 |  | 
| Walter Dörwald | 690402f | 2005-11-17 18:51:34 +0000 | [diff] [blame] | 21 | # A UnicodeEncodeError object with a bad start attribute | 
 | 22 | class BadStartUnicodeEncodeError(UnicodeEncodeError): | 
 | 23 |     def __init__(self): | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 24 |         UnicodeEncodeError.__init__(self, "ascii", "", 0, 1, "bad") | 
| Walter Dörwald | 690402f | 2005-11-17 18:51:34 +0000 | [diff] [blame] | 25 |         self.start = [] | 
 | 26 |  | 
| Walter Dörwald | 690402f | 2005-11-17 18:51:34 +0000 | [diff] [blame] | 27 | # A UnicodeEncodeError object with a bad object attribute | 
 | 28 | class BadObjectUnicodeEncodeError(UnicodeEncodeError): | 
 | 29 |     def __init__(self): | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 30 |         UnicodeEncodeError.__init__(self, "ascii", "", 0, 1, "bad") | 
| Walter Dörwald | 690402f | 2005-11-17 18:51:34 +0000 | [diff] [blame] | 31 |         self.object = [] | 
 | 32 |  | 
 | 33 | # A UnicodeDecodeError object without an end attribute | 
 | 34 | class NoEndUnicodeDecodeError(UnicodeDecodeError): | 
 | 35 |     def __init__(self): | 
| Guido van Rossum | 254348e | 2007-11-21 19:29:53 +0000 | [diff] [blame] | 36 |         UnicodeDecodeError.__init__(self, "ascii", bytearray(b""), 0, 1, "bad") | 
| Walter Dörwald | 690402f | 2005-11-17 18:51:34 +0000 | [diff] [blame] | 37 |         del self.end | 
 | 38 |  | 
 | 39 | # A UnicodeDecodeError object with a bad object attribute | 
 | 40 | class BadObjectUnicodeDecodeError(UnicodeDecodeError): | 
 | 41 |     def __init__(self): | 
| Guido van Rossum | 254348e | 2007-11-21 19:29:53 +0000 | [diff] [blame] | 42 |         UnicodeDecodeError.__init__(self, "ascii", bytearray(b""), 0, 1, "bad") | 
| Walter Dörwald | 690402f | 2005-11-17 18:51:34 +0000 | [diff] [blame] | 43 |         self.object = [] | 
 | 44 |  | 
 | 45 | # A UnicodeTranslateError object without a start attribute | 
 | 46 | class NoStartUnicodeTranslateError(UnicodeTranslateError): | 
 | 47 |     def __init__(self): | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 48 |         UnicodeTranslateError.__init__(self, "", 0, 1, "bad") | 
| Walter Dörwald | 690402f | 2005-11-17 18:51:34 +0000 | [diff] [blame] | 49 |         del self.start | 
 | 50 |  | 
 | 51 | # A UnicodeTranslateError object without an end attribute | 
 | 52 | class NoEndUnicodeTranslateError(UnicodeTranslateError): | 
 | 53 |     def __init__(self): | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 54 |         UnicodeTranslateError.__init__(self,  "", 0, 1, "bad") | 
| Walter Dörwald | 690402f | 2005-11-17 18:51:34 +0000 | [diff] [blame] | 55 |         del self.end | 
 | 56 |  | 
 | 57 | # A UnicodeTranslateError object without an object attribute | 
 | 58 | class NoObjectUnicodeTranslateError(UnicodeTranslateError): | 
 | 59 |     def __init__(self): | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 60 |         UnicodeTranslateError.__init__(self, "", 0, 1, "bad") | 
| Walter Dörwald | 690402f | 2005-11-17 18:51:34 +0000 | [diff] [blame] | 61 |         del self.object | 
 | 62 |  | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 63 | class CodecCallbackTest(unittest.TestCase): | 
 | 64 |  | 
 | 65 |     def test_xmlcharrefreplace(self): | 
 | 66 |         # replace unencodable characters which numeric character entities. | 
 | 67 |         # For ascii, latin-1 and charmaps this is completely implemented | 
 | 68 |         # in C and should be reasonably fast. | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 69 |         s = "\u30b9\u30d1\u30e2 \xe4nd eggs" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 70 |         self.assertEqual( | 
 | 71 |             s.encode("ascii", "xmlcharrefreplace"), | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 72 |             b"スパモ änd eggs" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 73 |         ) | 
 | 74 |         self.assertEqual( | 
 | 75 |             s.encode("latin-1", "xmlcharrefreplace"), | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 76 |             b"スパモ \xe4nd eggs" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 77 |         ) | 
 | 78 |  | 
 | 79 |     def test_xmlcharnamereplace(self): | 
 | 80 |         # This time use a named character entity for unencodable | 
 | 81 |         # characters, if one is available. | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 82 |  | 
 | 83 |         def xmlcharnamereplace(exc): | 
 | 84 |             if not isinstance(exc, UnicodeEncodeError): | 
 | 85 |                 raise TypeError("don't know how to handle %r" % exc) | 
 | 86 |             l = [] | 
 | 87 |             for c in exc.object[exc.start:exc.end]: | 
 | 88 |                 try: | 
| Fred Drake | 3c50ea4 | 2008-05-17 22:02:32 +0000 | [diff] [blame] | 89 |                     l.append("&%s;" % html.entities.codepoint2name[ord(c)]) | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 90 |                 except KeyError: | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 91 |                     l.append("&#%d;" % ord(c)) | 
 | 92 |             return ("".join(l), exc.end) | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 93 |  | 
 | 94 |         codecs.register_error( | 
 | 95 |             "test.xmlcharnamereplace", xmlcharnamereplace) | 
 | 96 |  | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 97 |         sin = "\xab\u211c\xbb = \u2329\u1234\u20ac\u232a" | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 98 |         sout = b"«ℜ» = ⟨ሴ€⟩" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 99 |         self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout) | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 100 |         sout = b"\xabℜ\xbb = ⟨ሴ€⟩" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 101 |         self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout) | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 102 |         sout = b"\xabℜ\xbb = ⟨ሴ\xa4⟩" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 103 |         self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout) | 
 | 104 |  | 
 | 105 |     def test_uninamereplace(self): | 
 | 106 |         # We're using the names from the unicode database this time, | 
| Walter Dörwald | 00445d2 | 2002-11-25 17:58:02 +0000 | [diff] [blame] | 107 |         # and we're doing "syntax highlighting" here, i.e. we include | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 108 |         # the replaced text in ANSI escape sequences. For this it is | 
 | 109 |         # useful that the error handler is not called for every single | 
 | 110 |         # unencodable character, but for a complete sequence of | 
 | 111 |         # unencodable characters, otherwise we would output many | 
| Mark Dickinson | 934896d | 2009-02-21 20:59:32 +0000 | [diff] [blame] | 112 |         # unnecessary escape sequences. | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 113 |  | 
 | 114 |         def uninamereplace(exc): | 
 | 115 |             if not isinstance(exc, UnicodeEncodeError): | 
 | 116 |                 raise TypeError("don't know how to handle %r" % exc) | 
 | 117 |             l = [] | 
 | 118 |             for c in exc.object[exc.start:exc.end]: | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 119 |                 l.append(unicodedata.name(c, "0x%x" % ord(c))) | 
 | 120 |             return ("\033[1m%s\033[0m" % ", ".join(l), exc.end) | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 121 |  | 
 | 122 |         codecs.register_error( | 
 | 123 |             "test.uninamereplace", uninamereplace) | 
 | 124 |  | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 125 |         sin = "\xac\u1234\u20ac\u8000" | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 126 |         sout = b"\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 127 |         self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout) | 
 | 128 |  | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 129 |         sout = b"\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 130 |         self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout) | 
 | 131 |  | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 132 |         sout = b"\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 133 |         self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout) | 
 | 134 |  | 
 | 135 |     def test_backslashescape(self): | 
 | 136 |         # Does the same as the "unicode-escape" encoding, but with different | 
 | 137 |         # base encodings. | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 138 |         sin = "a\xac\u1234\u20ac\u8000" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 139 |         if sys.maxunicode > 0xffff: | 
| Guido van Rossum | 84fc66d | 2007-05-03 17:18:26 +0000 | [diff] [blame] | 140 |             sin += chr(sys.maxunicode) | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 141 |         sout = b"a\\xac\\u1234\\u20ac\\u8000" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 142 |         if sys.maxunicode > 0xffff: | 
| Guido van Rossum | 3172c5d | 2007-10-16 18:12:55 +0000 | [diff] [blame] | 143 |             sout += bytes("\\U%08x" % sys.maxunicode, "ascii") | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 144 |         self.assertEqual(sin.encode("ascii", "backslashreplace"), sout) | 
 | 145 |  | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 146 |         sout = b"a\xac\\u1234\\u20ac\\u8000" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 147 |         if sys.maxunicode > 0xffff: | 
| Guido van Rossum | 3172c5d | 2007-10-16 18:12:55 +0000 | [diff] [blame] | 148 |             sout += bytes("\\U%08x" % sys.maxunicode, "ascii") | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 149 |         self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout) | 
 | 150 |  | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 151 |         sout = b"a\xac\\u1234\xa4\\u8000" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 152 |         if sys.maxunicode > 0xffff: | 
| Guido van Rossum | 3172c5d | 2007-10-16 18:12:55 +0000 | [diff] [blame] | 153 |             sout += bytes("\\U%08x" % sys.maxunicode, "ascii") | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 154 |         self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout) | 
 | 155 |  | 
| Walter Dörwald | a47d1c0 | 2005-08-30 10:23:14 +0000 | [diff] [blame] | 156 |     def test_decoderelaxedutf8(self): | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 157 |         # This is the test for a decoding callback handler, | 
 | 158 |         # that relaxes the UTF-8 minimal encoding restriction. | 
 | 159 |         # A null byte that is encoded as "\xc0\x80" will be | 
 | 160 |         # decoded as a null byte. All other illegal sequences | 
 | 161 |         # will be handled strictly. | 
 | 162 |         def relaxedutf8(exc): | 
 | 163 |             if not isinstance(exc, UnicodeDecodeError): | 
 | 164 |                 raise TypeError("don't know how to handle %r" % exc) | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 165 |             if exc.object[exc.start:exc.end].startswith(b"\xc0\x80"): | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 166 |                 return ("\x00", exc.start+2) # retry after two bytes | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 167 |             else: | 
 | 168 |                 raise exc | 
 | 169 |  | 
 | 170 |         codecs.register_error( | 
 | 171 |             "test.relaxedutf8", relaxedutf8) | 
 | 172 |  | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 173 |         sin = b"a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80" | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 174 |         sout = "a\x00b\x00c\xfc\x00\x00" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 175 |         self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout) | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 176 |         sin = b"\xc0\x80\xc0\x81" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 177 |         self.assertRaises(UnicodeError, sin.decode, "utf-8", "test.relaxedutf8") | 
 | 178 |  | 
 | 179 |     def test_charmapencode(self): | 
 | 180 |         # For charmap encodings the replacement string will be | 
 | 181 |         # mapped through the encoding again. This means, that | 
 | 182 |         # to be able to use e.g. the "replace" handler, the | 
 | 183 |         # charmap has to have a mapping for "?". | 
| Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 184 |         charmap = dict((ord(c), bytes(2*c.upper(), 'ascii')) for c in "abcdefgh") | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 185 |         sin = "abc" | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 186 |         sout = b"AABBCC" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 187 |         self.assertEquals(codecs.charmap_encode(sin, "strict", charmap)[0], sout) | 
 | 188 |  | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 189 |         sin = "abcA" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 190 |         self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap) | 
 | 191 |  | 
| Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 192 |         charmap[ord("?")] = b"XYZ" | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 193 |         sin = "abcDEF" | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 194 |         sout = b"AABBCCXYZXYZXYZ" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 195 |         self.assertEquals(codecs.charmap_encode(sin, "replace", charmap)[0], sout) | 
 | 196 |  | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 197 |         charmap[ord("?")] = "XYZ" # wrong type in mapping | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 198 |         self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap) | 
 | 199 |  | 
| Walter Dörwald | a47d1c0 | 2005-08-30 10:23:14 +0000 | [diff] [blame] | 200 |     def test_decodeunicodeinternal(self): | 
 | 201 |         self.assertRaises( | 
 | 202 |             UnicodeDecodeError, | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 203 |             b"\x00\x00\x00\x00\x00".decode, | 
| Walter Dörwald | a47d1c0 | 2005-08-30 10:23:14 +0000 | [diff] [blame] | 204 |             "unicode-internal", | 
 | 205 |         ) | 
 | 206 |         if sys.maxunicode > 0xffff: | 
 | 207 |             def handler_unicodeinternal(exc): | 
 | 208 |                 if not isinstance(exc, UnicodeDecodeError): | 
 | 209 |                     raise TypeError("don't know how to handle %r" % exc) | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 210 |                 return ("\x01", 1) | 
| Walter Dörwald | a47d1c0 | 2005-08-30 10:23:14 +0000 | [diff] [blame] | 211 |  | 
 | 212 |             self.assertEqual( | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 213 |                 b"\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"), | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 214 |                 "\u0000" | 
| Walter Dörwald | a47d1c0 | 2005-08-30 10:23:14 +0000 | [diff] [blame] | 215 |             ) | 
 | 216 |  | 
 | 217 |             self.assertEqual( | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 218 |                 b"\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"), | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 219 |                 "\u0000\ufffd" | 
| Walter Dörwald | a47d1c0 | 2005-08-30 10:23:14 +0000 | [diff] [blame] | 220 |             ) | 
 | 221 |  | 
 | 222 |             codecs.register_error("test.hui", handler_unicodeinternal) | 
 | 223 |  | 
 | 224 |             self.assertEqual( | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 225 |                 b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"), | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 226 |                 "\u0000\u0001\u0000" | 
| Walter Dörwald | a47d1c0 | 2005-08-30 10:23:14 +0000 | [diff] [blame] | 227 |             ) | 
 | 228 |  | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 229 |     def test_callbacks(self): | 
 | 230 |         def handler1(exc): | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 231 |             r = range(exc.start, exc.end) | 
 | 232 |             if isinstance(exc, UnicodeEncodeError): | 
 | 233 |                 l = ["<%d>" % ord(exc.object[pos]) for pos in r] | 
 | 234 |             elif isinstance(exc, UnicodeDecodeError): | 
 | 235 |                 l = ["<%d>" % exc.object[pos] for pos in r] | 
 | 236 |             else: | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 237 |                 raise TypeError("don't know how to handle %r" % exc) | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 238 |             return ("[%s]" % "".join(l), exc.end) | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 239 |  | 
 | 240 |         codecs.register_error("test.handler1", handler1) | 
 | 241 |  | 
 | 242 |         def handler2(exc): | 
 | 243 |             if not isinstance(exc, UnicodeDecodeError): | 
 | 244 |                 raise TypeError("don't know how to handle %r" % exc) | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 245 |             l = ["<%d>" % exc.object[pos] for pos in range(exc.start, exc.end)] | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 246 |             return ("[%s]" % "".join(l), exc.end+1) # skip one character | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 247 |  | 
 | 248 |         codecs.register_error("test.handler2", handler2) | 
 | 249 |  | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 250 |         s = b"\x00\x81\x7f\x80\xff" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 251 |  | 
 | 252 |         self.assertEqual( | 
 | 253 |             s.decode("ascii", "test.handler1"), | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 254 |             "\x00[<129>]\x7f[<128>][<255>]" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 255 |         ) | 
 | 256 |         self.assertEqual( | 
 | 257 |             s.decode("ascii", "test.handler2"), | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 258 |             "\x00[<129>][<128>]" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 259 |         ) | 
 | 260 |  | 
 | 261 |         self.assertEqual( | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 262 |             b"\\u3042\u3xxx".decode("unicode-escape", "test.handler1"), | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 263 |             "\u3042[<92><117><51><120>]xx" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 264 |         ) | 
 | 265 |  | 
 | 266 |         self.assertEqual( | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 267 |             b"\\u3042\u3xx".decode("unicode-escape", "test.handler1"), | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 268 |             "\u3042[<92><117><51><120><120>]" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 269 |         ) | 
 | 270 |  | 
 | 271 |         self.assertEqual( | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 272 |             codecs.charmap_decode(b"abc", "test.handler1", {ord("a"): "z"})[0], | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 273 |             "z[<98>][<99>]" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 274 |         ) | 
 | 275 |  | 
 | 276 |         self.assertEqual( | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 277 |             "g\xfc\xdfrk".encode("ascii", "test.handler1"), | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 278 |             b"g[<252><223>]rk" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 279 |         ) | 
 | 280 |  | 
 | 281 |         self.assertEqual( | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 282 |             "g\xfc\xdf".encode("ascii", "test.handler1"), | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 283 |             b"g[<252><223>]" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 284 |         ) | 
 | 285 |  | 
 | 286 |     def test_longstrings(self): | 
 | 287 |         # test long strings to check for memory overflow problems | 
| Walter Dörwald | 41980ca | 2007-08-16 21:55:45 +0000 | [diff] [blame] | 288 |         errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", | 
 | 289 |                    "backslashreplace"] | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 290 |         # register the handlers under different names, | 
 | 291 |         # to prevent the codec from recognizing the name | 
 | 292 |         for err in errors: | 
 | 293 |             codecs.register_error("test." + err, codecs.lookup_error(err)) | 
 | 294 |         l = 1000 | 
 | 295 |         errors += [ "test." + err for err in errors ] | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 296 |         for uni in [ s*l for s in ("x", "\u3042", "a\xe4") ]: | 
| Walter Dörwald | 41980ca | 2007-08-16 21:55:45 +0000 | [diff] [blame] | 297 |             for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", | 
 | 298 |                         "utf-8", "utf-7", "utf-16", "utf-32"): | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 299 |                 for err in errors: | 
| Tim Peters | 3de7526 | 2002-11-09 05:26:15 +0000 | [diff] [blame] | 300 |                     try: | 
 | 301 |                         uni.encode(enc, err) | 
 | 302 |                     except UnicodeError: | 
 | 303 |                         pass | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 304 |  | 
 | 305 |     def check_exceptionobjectargs(self, exctype, args, msg): | 
 | 306 |         # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion | 
 | 307 |         # check with one missing argument | 
 | 308 |         self.assertRaises(TypeError, exctype, *args[:-1]) | 
| Walter Dörwald | ea4250d | 2003-01-20 02:34:07 +0000 | [diff] [blame] | 309 |         # check with one argument too much | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 310 |         self.assertRaises(TypeError, exctype, *(args + ["too much"])) | 
 | 311 |         # check with one argument of the wrong type | 
| Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 312 |         wrongargs = [ "spam", b"eggs", b"spam", 42, 1.0, None ] | 
| Guido van Rossum | 805365e | 2007-05-07 22:24:25 +0000 | [diff] [blame] | 313 |         for i in range(len(args)): | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 314 |             for wrongarg in wrongargs: | 
 | 315 |                 if type(wrongarg) is type(args[i]): | 
| Tim Peters | 3de7526 | 2002-11-09 05:26:15 +0000 | [diff] [blame] | 316 |                     continue | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 317 |                 # build argument array | 
 | 318 |                 callargs = [] | 
| Guido van Rossum | 805365e | 2007-05-07 22:24:25 +0000 | [diff] [blame] | 319 |                 for j in range(len(args)): | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 320 |                     if i==j: | 
 | 321 |                         callargs.append(wrongarg) | 
 | 322 |                     else: | 
 | 323 |                         callargs.append(args[i]) | 
 | 324 |                 self.assertRaises(TypeError, exctype, *callargs) | 
| Walter Dörwald | ea4250d | 2003-01-20 02:34:07 +0000 | [diff] [blame] | 325 |  | 
 | 326 |         # check with the correct number and type of arguments | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 327 |         exc = exctype(*args) | 
 | 328 |         self.assertEquals(str(exc), msg) | 
 | 329 |  | 
 | 330 |     def test_unicodeencodeerror(self): | 
 | 331 |         self.check_exceptionobjectargs( | 
 | 332 |             UnicodeEncodeError, | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 333 |             ["ascii", "g\xfcrk", 1, 2, "ouch"], | 
| Walter Dörwald | 32a4c71 | 2007-06-20 09:25:34 +0000 | [diff] [blame] | 334 |             "'ascii' codec can't encode character '\\xfc' in position 1: ouch" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 335 |         ) | 
 | 336 |         self.check_exceptionobjectargs( | 
 | 337 |             UnicodeEncodeError, | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 338 |             ["ascii", "g\xfcrk", 1, 4, "ouch"], | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 339 |             "'ascii' codec can't encode characters in position 1-3: ouch" | 
 | 340 |         ) | 
 | 341 |         self.check_exceptionobjectargs( | 
 | 342 |             UnicodeEncodeError, | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 343 |             ["ascii", "\xfcx", 0, 1, "ouch"], | 
| Walter Dörwald | 32a4c71 | 2007-06-20 09:25:34 +0000 | [diff] [blame] | 344 |             "'ascii' codec can't encode character '\\xfc' in position 0: ouch" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 345 |         ) | 
| Walter Dörwald | fd196bd | 2003-08-12 17:32:43 +0000 | [diff] [blame] | 346 |         self.check_exceptionobjectargs( | 
 | 347 |             UnicodeEncodeError, | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 348 |             ["ascii", "\u0100x", 0, 1, "ouch"], | 
| Walter Dörwald | 32a4c71 | 2007-06-20 09:25:34 +0000 | [diff] [blame] | 349 |             "'ascii' codec can't encode character '\\u0100' in position 0: ouch" | 
| Walter Dörwald | fd196bd | 2003-08-12 17:32:43 +0000 | [diff] [blame] | 350 |         ) | 
 | 351 |         self.check_exceptionobjectargs( | 
 | 352 |             UnicodeEncodeError, | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 353 |             ["ascii", "\uffffx", 0, 1, "ouch"], | 
| Walter Dörwald | 32a4c71 | 2007-06-20 09:25:34 +0000 | [diff] [blame] | 354 |             "'ascii' codec can't encode character '\\uffff' in position 0: ouch" | 
| Walter Dörwald | fd196bd | 2003-08-12 17:32:43 +0000 | [diff] [blame] | 355 |         ) | 
 | 356 |         if sys.maxunicode > 0xffff: | 
 | 357 |             self.check_exceptionobjectargs( | 
 | 358 |                 UnicodeEncodeError, | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 359 |                 ["ascii", "\U00010000x", 0, 1, "ouch"], | 
| Walter Dörwald | 32a4c71 | 2007-06-20 09:25:34 +0000 | [diff] [blame] | 360 |                 "'ascii' codec can't encode character '\\U00010000' in position 0: ouch" | 
| Walter Dörwald | fd196bd | 2003-08-12 17:32:43 +0000 | [diff] [blame] | 361 |             ) | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 362 |  | 
 | 363 |     def test_unicodedecodeerror(self): | 
 | 364 |         self.check_exceptionobjectargs( | 
 | 365 |             UnicodeDecodeError, | 
| Guido van Rossum | 254348e | 2007-11-21 19:29:53 +0000 | [diff] [blame] | 366 |             ["ascii", bytearray(b"g\xfcrk"), 1, 2, "ouch"], | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 367 |             "'ascii' codec can't decode byte 0xfc in position 1: ouch" | 
 | 368 |         ) | 
 | 369 |         self.check_exceptionobjectargs( | 
 | 370 |             UnicodeDecodeError, | 
| Guido van Rossum | 254348e | 2007-11-21 19:29:53 +0000 | [diff] [blame] | 371 |             ["ascii", bytearray(b"g\xfcrk"), 1, 3, "ouch"], | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 372 |             "'ascii' codec can't decode bytes in position 1-2: ouch" | 
 | 373 |         ) | 
 | 374 |  | 
 | 375 |     def test_unicodetranslateerror(self): | 
 | 376 |         self.check_exceptionobjectargs( | 
 | 377 |             UnicodeTranslateError, | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 378 |             ["g\xfcrk", 1, 2, "ouch"], | 
| Walter Dörwald | 32a4c71 | 2007-06-20 09:25:34 +0000 | [diff] [blame] | 379 |             "can't translate character '\\xfc' in position 1: ouch" | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 380 |         ) | 
 | 381 |         self.check_exceptionobjectargs( | 
 | 382 |             UnicodeTranslateError, | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 383 |             ["g\u0100rk", 1, 2, "ouch"], | 
| Walter Dörwald | 32a4c71 | 2007-06-20 09:25:34 +0000 | [diff] [blame] | 384 |             "can't translate character '\\u0100' in position 1: ouch" | 
| Walter Dörwald | fd196bd | 2003-08-12 17:32:43 +0000 | [diff] [blame] | 385 |         ) | 
 | 386 |         self.check_exceptionobjectargs( | 
 | 387 |             UnicodeTranslateError, | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 388 |             ["g\uffffrk", 1, 2, "ouch"], | 
| Walter Dörwald | 32a4c71 | 2007-06-20 09:25:34 +0000 | [diff] [blame] | 389 |             "can't translate character '\\uffff' in position 1: ouch" | 
| Walter Dörwald | fd196bd | 2003-08-12 17:32:43 +0000 | [diff] [blame] | 390 |         ) | 
 | 391 |         if sys.maxunicode > 0xffff: | 
 | 392 |             self.check_exceptionobjectargs( | 
 | 393 |                 UnicodeTranslateError, | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 394 |                 ["g\U00010000rk", 1, 2, "ouch"], | 
| Walter Dörwald | 32a4c71 | 2007-06-20 09:25:34 +0000 | [diff] [blame] | 395 |                 "can't translate character '\\U00010000' in position 1: ouch" | 
| Walter Dörwald | fd196bd | 2003-08-12 17:32:43 +0000 | [diff] [blame] | 396 |             ) | 
 | 397 |         self.check_exceptionobjectargs( | 
 | 398 |             UnicodeTranslateError, | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 399 |             ["g\xfcrk", 1, 3, "ouch"], | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 400 |             "can't translate characters in position 1-2: ouch" | 
 | 401 |         ) | 
 | 402 |  | 
 | 403 |     def test_badandgoodstrictexceptions(self): | 
| Walter Dörwald | ea4250d | 2003-01-20 02:34:07 +0000 | [diff] [blame] | 404 |         # "strict" complains about a non-exception passed in | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 405 |         self.assertRaises( | 
 | 406 |             TypeError, | 
 | 407 |             codecs.strict_errors, | 
 | 408 |             42 | 
 | 409 |         ) | 
| Walter Dörwald | ea4250d | 2003-01-20 02:34:07 +0000 | [diff] [blame] | 410 |         # "strict" complains about the wrong exception type | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 411 |         self.assertRaises( | 
 | 412 |             Exception, | 
 | 413 |             codecs.strict_errors, | 
 | 414 |             Exception("ouch") | 
 | 415 |         ) | 
 | 416 |  | 
| Walter Dörwald | ea4250d | 2003-01-20 02:34:07 +0000 | [diff] [blame] | 417 |         # If the correct exception is passed in, "strict" raises it | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 418 |         self.assertRaises( | 
 | 419 |             UnicodeEncodeError, | 
 | 420 |             codecs.strict_errors, | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 421 |             UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch") | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 422 |         ) | 
 | 423 |  | 
 | 424 |     def test_badandgoodignoreexceptions(self): | 
| Walter Dörwald | ea4250d | 2003-01-20 02:34:07 +0000 | [diff] [blame] | 425 |         # "ignore" complains about a non-exception passed in | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 426 |         self.assertRaises( | 
 | 427 |            TypeError, | 
 | 428 |            codecs.ignore_errors, | 
 | 429 |            42 | 
 | 430 |         ) | 
| Walter Dörwald | ea4250d | 2003-01-20 02:34:07 +0000 | [diff] [blame] | 431 |         # "ignore" complains about the wrong exception type | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 432 |         self.assertRaises( | 
 | 433 |            TypeError, | 
 | 434 |            codecs.ignore_errors, | 
 | 435 |            UnicodeError("ouch") | 
 | 436 |         ) | 
| Walter Dörwald | ea4250d | 2003-01-20 02:34:07 +0000 | [diff] [blame] | 437 |         # If the correct exception is passed in, "ignore" returns an empty replacement | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 438 |         self.assertEquals( | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 439 |             codecs.ignore_errors( | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 440 |                 UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")), | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 441 |             ("", 1) | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 442 |         ) | 
 | 443 |         self.assertEquals( | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 444 |             codecs.ignore_errors( | 
| Guido van Rossum | 254348e | 2007-11-21 19:29:53 +0000 | [diff] [blame] | 445 |                 UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")), | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 446 |             ("", 1) | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 447 |         ) | 
 | 448 |         self.assertEquals( | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 449 |             codecs.ignore_errors( | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 450 |                 UnicodeTranslateError("\u3042", 0, 1, "ouch")), | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 451 |             ("", 1) | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 452 |         ) | 
 | 453 |  | 
 | 454 |     def test_badandgoodreplaceexceptions(self): | 
| Walter Dörwald | ea4250d | 2003-01-20 02:34:07 +0000 | [diff] [blame] | 455 |         # "replace" complains about a non-exception passed in | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 456 |         self.assertRaises( | 
 | 457 |            TypeError, | 
 | 458 |            codecs.replace_errors, | 
 | 459 |            42 | 
 | 460 |         ) | 
| Walter Dörwald | ea4250d | 2003-01-20 02:34:07 +0000 | [diff] [blame] | 461 |         # "replace" complains about the wrong exception type | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 462 |         self.assertRaises( | 
 | 463 |            TypeError, | 
 | 464 |            codecs.replace_errors, | 
 | 465 |            UnicodeError("ouch") | 
 | 466 |         ) | 
| Walter Dörwald | 690402f | 2005-11-17 18:51:34 +0000 | [diff] [blame] | 467 |         self.assertRaises( | 
| Walter Dörwald | 690402f | 2005-11-17 18:51:34 +0000 | [diff] [blame] | 468 |             TypeError, | 
 | 469 |             codecs.replace_errors, | 
 | 470 |             BadObjectUnicodeEncodeError() | 
 | 471 |         ) | 
 | 472 |         self.assertRaises( | 
| Walter Dörwald | 690402f | 2005-11-17 18:51:34 +0000 | [diff] [blame] | 473 |             TypeError, | 
 | 474 |             codecs.replace_errors, | 
 | 475 |             BadObjectUnicodeDecodeError() | 
 | 476 |         ) | 
| Guido van Rossum | 805365e | 2007-05-07 22:24:25 +0000 | [diff] [blame] | 477 |         # With the correct exception, "replace" returns an "?" or "\ufffd" replacement | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 478 |         self.assertEquals( | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 479 |             codecs.replace_errors( | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 480 |                 UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")), | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 481 |             ("?", 1) | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 482 |         ) | 
 | 483 |         self.assertEquals( | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 484 |             codecs.replace_errors( | 
| Guido van Rossum | 254348e | 2007-11-21 19:29:53 +0000 | [diff] [blame] | 485 |                 UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")), | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 486 |             ("\ufffd", 1) | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 487 |         ) | 
 | 488 |         self.assertEquals( | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 489 |             codecs.replace_errors( | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 490 |                 UnicodeTranslateError("\u3042", 0, 1, "ouch")), | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 491 |             ("\ufffd", 1) | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 492 |         ) | 
 | 493 |  | 
 | 494 |     def test_badandgoodxmlcharrefreplaceexceptions(self): | 
| Walter Dörwald | ea4250d | 2003-01-20 02:34:07 +0000 | [diff] [blame] | 495 |         # "xmlcharrefreplace" complains about a non-exception passed in | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 496 |         self.assertRaises( | 
 | 497 |            TypeError, | 
 | 498 |            codecs.xmlcharrefreplace_errors, | 
 | 499 |            42 | 
 | 500 |         ) | 
| Walter Dörwald | ea4250d | 2003-01-20 02:34:07 +0000 | [diff] [blame] | 501 |         # "xmlcharrefreplace" complains about the wrong exception types | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 502 |         self.assertRaises( | 
 | 503 |            TypeError, | 
 | 504 |            codecs.xmlcharrefreplace_errors, | 
 | 505 |            UnicodeError("ouch") | 
 | 506 |         ) | 
| Walter Dörwald | ea4250d | 2003-01-20 02:34:07 +0000 | [diff] [blame] | 507 |         # "xmlcharrefreplace" can only be used for encoding | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 508 |         self.assertRaises( | 
 | 509 |             TypeError, | 
 | 510 |             codecs.xmlcharrefreplace_errors, | 
| Guido van Rossum | 254348e | 2007-11-21 19:29:53 +0000 | [diff] [blame] | 511 |             UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch") | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 512 |         ) | 
 | 513 |         self.assertRaises( | 
 | 514 |             TypeError, | 
 | 515 |             codecs.xmlcharrefreplace_errors, | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 516 |             UnicodeTranslateError("\u3042", 0, 1, "ouch") | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 517 |         ) | 
| Walter Dörwald | ea4250d | 2003-01-20 02:34:07 +0000 | [diff] [blame] | 518 |         # Use the correct exception | 
| Walter Dörwald | 690402f | 2005-11-17 18:51:34 +0000 | [diff] [blame] | 519 |         cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042) | 
| Guido van Rossum | 84fc66d | 2007-05-03 17:18:26 +0000 | [diff] [blame] | 520 |         s = "".join(chr(c) for c in cs) | 
| Walter Dörwald | ea4250d | 2003-01-20 02:34:07 +0000 | [diff] [blame] | 521 |         self.assertEquals( | 
| Walter Dörwald | 690402f | 2005-11-17 18:51:34 +0000 | [diff] [blame] | 522 |             codecs.xmlcharrefreplace_errors( | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 523 |                 UnicodeEncodeError("ascii", s, 0, len(s), "ouch") | 
| Walter Dörwald | 690402f | 2005-11-17 18:51:34 +0000 | [diff] [blame] | 524 |             ), | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 525 |             ("".join("&#%d;" % ord(c) for c in s), len(s)) | 
| Walter Dörwald | ea4250d | 2003-01-20 02:34:07 +0000 | [diff] [blame] | 526 |         ) | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 527 |  | 
 | 528 |     def test_badandgoodbackslashreplaceexceptions(self): | 
| Walter Dörwald | ea4250d | 2003-01-20 02:34:07 +0000 | [diff] [blame] | 529 |         # "backslashreplace" complains about a non-exception passed in | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 530 |         self.assertRaises( | 
 | 531 |            TypeError, | 
 | 532 |            codecs.backslashreplace_errors, | 
 | 533 |            42 | 
 | 534 |         ) | 
| Walter Dörwald | ea4250d | 2003-01-20 02:34:07 +0000 | [diff] [blame] | 535 |         # "backslashreplace" complains about the wrong exception types | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 536 |         self.assertRaises( | 
 | 537 |            TypeError, | 
 | 538 |            codecs.backslashreplace_errors, | 
 | 539 |            UnicodeError("ouch") | 
 | 540 |         ) | 
| Walter Dörwald | ea4250d | 2003-01-20 02:34:07 +0000 | [diff] [blame] | 541 |         # "backslashreplace" can only be used for encoding | 
 | 542 |         self.assertRaises( | 
 | 543 |             TypeError, | 
 | 544 |             codecs.backslashreplace_errors, | 
| Guido van Rossum | 254348e | 2007-11-21 19:29:53 +0000 | [diff] [blame] | 545 |             UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch") | 
| Walter Dörwald | ea4250d | 2003-01-20 02:34:07 +0000 | [diff] [blame] | 546 |         ) | 
 | 547 |         self.assertRaises( | 
 | 548 |             TypeError, | 
 | 549 |             codecs.backslashreplace_errors, | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 550 |             UnicodeTranslateError("\u3042", 0, 1, "ouch") | 
| Walter Dörwald | ea4250d | 2003-01-20 02:34:07 +0000 | [diff] [blame] | 551 |         ) | 
 | 552 |         # Use the correct exception | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 553 |         self.assertEquals( | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 554 |             codecs.backslashreplace_errors( | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 555 |                 UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")), | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 556 |             ("\\u3042", 1) | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 557 |         ) | 
 | 558 |         self.assertEquals( | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 559 |             codecs.backslashreplace_errors( | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 560 |                 UnicodeEncodeError("ascii", "\x00", 0, 1, "ouch")), | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 561 |             ("\\x00", 1) | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 562 |         ) | 
 | 563 |         self.assertEquals( | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 564 |             codecs.backslashreplace_errors( | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 565 |                 UnicodeEncodeError("ascii", "\xff", 0, 1, "ouch")), | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 566 |             ("\\xff", 1) | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 567 |         ) | 
 | 568 |         self.assertEquals( | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 569 |             codecs.backslashreplace_errors( | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 570 |                 UnicodeEncodeError("ascii", "\u0100", 0, 1, "ouch")), | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 571 |             ("\\u0100", 1) | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 572 |         ) | 
 | 573 |         self.assertEquals( | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 574 |             codecs.backslashreplace_errors( | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 575 |                 UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")), | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 576 |             ("\\uffff", 1) | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 577 |         ) | 
 | 578 |         if sys.maxunicode>0xffff: | 
 | 579 |             self.assertEquals( | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 580 |                 codecs.backslashreplace_errors( | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 581 |                     UnicodeEncodeError("ascii", "\U00010000", 0, 1, "ouch")), | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 582 |                 ("\\U00010000", 1) | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 583 |             ) | 
 | 584 |             self.assertEquals( | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 585 |                 codecs.backslashreplace_errors( | 
| Walter Dörwald | d203431 | 2007-05-18 16:29:38 +0000 | [diff] [blame] | 586 |                     UnicodeEncodeError("ascii", "\U0010ffff", 0, 1, "ouch")), | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 587 |                 ("\\U0010ffff", 1) | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 588 |             ) | 
 | 589 |  | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 590 |     def test_badhandlerresults(self): | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 591 |         results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) ) | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 592 |         encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15") | 
 | 593 |  | 
 | 594 |         for res in results: | 
| Benjamin Peterson | b58dda7 | 2009-01-18 22:27:04 +0000 | [diff] [blame] | 595 |             codecs.register_error("test.badhandler", lambda x: res) | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 596 |             for enc in encs: | 
 | 597 |                 self.assertRaises( | 
 | 598 |                     TypeError, | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 599 |                     "\u3042".encode, | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 600 |                     enc, | 
 | 601 |                     "test.badhandler" | 
 | 602 |                 ) | 
 | 603 |             for (enc, bytes) in ( | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 604 |                 ("ascii", b"\xff"), | 
 | 605 |                 ("utf-8", b"\xff"), | 
 | 606 |                 ("utf-7", b"+x-"), | 
 | 607 |                 ("unicode-internal", b"\x00"), | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 608 |             ): | 
 | 609 |                 self.assertRaises( | 
 | 610 |                     TypeError, | 
 | 611 |                     bytes.decode, | 
 | 612 |                     enc, | 
 | 613 |                     "test.badhandler" | 
 | 614 |                 ) | 
 | 615 |  | 
 | 616 |     def test_lookup(self): | 
 | 617 |         self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict")) | 
 | 618 |         self.assertEquals(codecs.ignore_errors, codecs.lookup_error("ignore")) | 
 | 619 |         self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict")) | 
 | 620 |         self.assertEquals( | 
 | 621 |             codecs.xmlcharrefreplace_errors, | 
 | 622 |             codecs.lookup_error("xmlcharrefreplace") | 
 | 623 |         ) | 
 | 624 |         self.assertEquals( | 
 | 625 |             codecs.backslashreplace_errors, | 
 | 626 |             codecs.lookup_error("backslashreplace") | 
 | 627 |         ) | 
 | 628 |  | 
| Walter Dörwald | 9ab7dd4 | 2002-09-06 17:21:40 +0000 | [diff] [blame] | 629 |     def test_unencodablereplacement(self): | 
 | 630 |         def unencrepl(exc): | 
 | 631 |             if isinstance(exc, UnicodeEncodeError): | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 632 |                 return ("\u4242", exc.end) | 
| Walter Dörwald | 9ab7dd4 | 2002-09-06 17:21:40 +0000 | [diff] [blame] | 633 |             else: | 
 | 634 |                 raise TypeError("don't know how to handle %r" % exc) | 
 | 635 |         codecs.register_error("test.unencreplhandler", unencrepl) | 
 | 636 |         for enc in ("ascii", "iso-8859-1", "iso-8859-15"): | 
 | 637 |             self.assertRaises( | 
 | 638 |                 UnicodeEncodeError, | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 639 |                 "\u4242".encode, | 
| Walter Dörwald | 9ab7dd4 | 2002-09-06 17:21:40 +0000 | [diff] [blame] | 640 |                 enc, | 
 | 641 |                 "test.unencreplhandler" | 
 | 642 |             ) | 
 | 643 |  | 
| Walter Dörwald | 30537a4 | 2003-01-08 23:22:13 +0000 | [diff] [blame] | 644 |     def test_badregistercall(self): | 
 | 645 |         # enhance coverage of: | 
 | 646 |         # Modules/_codecsmodule.c::register_error() | 
 | 647 |         # Python/codecs.c::PyCodec_RegisterError() | 
 | 648 |         self.assertRaises(TypeError, codecs.register_error, 42) | 
 | 649 |         self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42) | 
 | 650 |  | 
| Walter Dörwald | e22d339 | 2005-11-17 08:52:34 +0000 | [diff] [blame] | 651 |     def test_badlookupcall(self): | 
 | 652 |         # enhance coverage of: | 
 | 653 |         # Modules/_codecsmodule.c::lookup_error() | 
 | 654 |         self.assertRaises(TypeError, codecs.lookup_error) | 
 | 655 |  | 
| Walter Dörwald | 30537a4 | 2003-01-08 23:22:13 +0000 | [diff] [blame] | 656 |     def test_unknownhandler(self): | 
 | 657 |         # enhance coverage of: | 
 | 658 |         # Modules/_codecsmodule.c::lookup_error() | 
 | 659 |         self.assertRaises(LookupError, codecs.lookup_error, "test.unknown") | 
 | 660 |  | 
 | 661 |     def test_xmlcharrefvalues(self): | 
 | 662 |         # enhance coverage of: | 
 | 663 |         # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors() | 
 | 664 |         # and inline implementations | 
 | 665 |         v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000) | 
| Walter Dörwald | 0cb27dd | 2003-01-09 11:38:50 +0000 | [diff] [blame] | 666 |         if sys.maxunicode>=100000: | 
| Tim Peters | f2715e0 | 2003-02-19 02:35:07 +0000 | [diff] [blame] | 667 |             v += (100000, 500000, 1000000) | 
| Guido van Rossum | 84fc66d | 2007-05-03 17:18:26 +0000 | [diff] [blame] | 668 |         s = "".join([chr(x) for x in v]) | 
| Walter Dörwald | 30537a4 | 2003-01-08 23:22:13 +0000 | [diff] [blame] | 669 |         codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors) | 
 | 670 |         for enc in ("ascii", "iso-8859-15"): | 
 | 671 |             for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"): | 
 | 672 |                 s.encode(enc, err) | 
 | 673 |  | 
 | 674 |     def test_decodehelper(self): | 
 | 675 |         # enhance coverage of: | 
 | 676 |         # Objects/unicodeobject.c::unicode_decode_call_errorhandler() | 
 | 677 |         # and callers | 
| Guido van Rossum | 09549f4 | 2007-08-27 20:40:10 +0000 | [diff] [blame] | 678 |         self.assertRaises(LookupError, b"\xff".decode, "ascii", "test.unknown") | 
| Walter Dörwald | 30537a4 | 2003-01-08 23:22:13 +0000 | [diff] [blame] | 679 |  | 
 | 680 |         def baddecodereturn1(exc): | 
 | 681 |             return 42 | 
 | 682 |         codecs.register_error("test.baddecodereturn1", baddecodereturn1) | 
| Guido van Rossum | 09549f4 | 2007-08-27 20:40:10 +0000 | [diff] [blame] | 683 |         self.assertRaises(TypeError, b"\xff".decode, "ascii", "test.baddecodereturn1") | 
 | 684 |         self.assertRaises(TypeError, b"\\".decode, "unicode-escape", "test.baddecodereturn1") | 
 | 685 |         self.assertRaises(TypeError, b"\\x0".decode, "unicode-escape", "test.baddecodereturn1") | 
 | 686 |         self.assertRaises(TypeError, b"\\x0y".decode, "unicode-escape", "test.baddecodereturn1") | 
 | 687 |         self.assertRaises(TypeError, b"\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1") | 
 | 688 |         self.assertRaises(TypeError, b"\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1") | 
| Walter Dörwald | 30537a4 | 2003-01-08 23:22:13 +0000 | [diff] [blame] | 689 |  | 
 | 690 |         def baddecodereturn2(exc): | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 691 |             return ("?", None) | 
| Walter Dörwald | 30537a4 | 2003-01-08 23:22:13 +0000 | [diff] [blame] | 692 |         codecs.register_error("test.baddecodereturn2", baddecodereturn2) | 
| Guido van Rossum | 09549f4 | 2007-08-27 20:40:10 +0000 | [diff] [blame] | 693 |         self.assertRaises(TypeError, b"\xff".decode, "ascii", "test.baddecodereturn2") | 
| Walter Dörwald | 30537a4 | 2003-01-08 23:22:13 +0000 | [diff] [blame] | 694 |  | 
| Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 695 |         handler = PosReturn() | 
 | 696 |         codecs.register_error("test.posreturn", handler.handle) | 
| Walter Dörwald | 30537a4 | 2003-01-08 23:22:13 +0000 | [diff] [blame] | 697 |  | 
| Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 698 |         # Valid negative position | 
 | 699 |         handler.pos = -1 | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 700 |         self.assertEquals(b"\xff0".decode("ascii", "test.posreturn"), "<?>0") | 
| Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 701 |  | 
 | 702 |         # Valid negative position | 
 | 703 |         handler.pos = -2 | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 704 |         self.assertEquals(b"\xff0".decode("ascii", "test.posreturn"), "<?><?>") | 
| Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 705 |  | 
 | 706 |         # Negative position out of bounds | 
 | 707 |         handler.pos = -3 | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 708 |         self.assertRaises(IndexError, b"\xff0".decode, "ascii", "test.posreturn") | 
| Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 709 |  | 
 | 710 |         # Valid positive position | 
 | 711 |         handler.pos = 1 | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 712 |         self.assertEquals(b"\xff0".decode("ascii", "test.posreturn"), "<?>0") | 
| Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 713 |  | 
| Walter Dörwald | 29ddfba | 2004-12-14 21:28:07 +0000 | [diff] [blame] | 714 |         # Largest valid positive position (one beyond end of input) | 
| Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 715 |         handler.pos = 2 | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 716 |         self.assertEquals(b"\xff0".decode("ascii", "test.posreturn"), "<?>") | 
| Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 717 |  | 
 | 718 |         # Invalid positive position | 
 | 719 |         handler.pos = 3 | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 720 |         self.assertRaises(IndexError, b"\xff0".decode, "ascii", "test.posreturn") | 
| Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 721 |  | 
 | 722 |         # Restart at the "0" | 
 | 723 |         handler.pos = 6 | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 724 |         self.assertEquals(b"\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), "<?>0") | 
| Walter Dörwald | 30537a4 | 2003-01-08 23:22:13 +0000 | [diff] [blame] | 725 |  | 
 | 726 |         class D(dict): | 
 | 727 |             def __getitem__(self, key): | 
 | 728 |                 raise ValueError | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 729 |         self.assertRaises(UnicodeError, codecs.charmap_decode, b"\xff", "strict", {0xff: None}) | 
 | 730 |         self.assertRaises(ValueError, codecs.charmap_decode, b"\xff", "strict", D()) | 
 | 731 |         self.assertRaises(TypeError, codecs.charmap_decode, b"\xff", "strict", {0xff: sys.maxunicode+1}) | 
| Walter Dörwald | 30537a4 | 2003-01-08 23:22:13 +0000 | [diff] [blame] | 732 |  | 
 | 733 |     def test_encodehelper(self): | 
 | 734 |         # enhance coverage of: | 
 | 735 |         # Objects/unicodeobject.c::unicode_encode_call_errorhandler() | 
 | 736 |         # and callers | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 737 |         self.assertRaises(LookupError, "\xff".encode, "ascii", "test.unknown") | 
| Walter Dörwald | 30537a4 | 2003-01-08 23:22:13 +0000 | [diff] [blame] | 738 |  | 
 | 739 |         def badencodereturn1(exc): | 
 | 740 |             return 42 | 
 | 741 |         codecs.register_error("test.badencodereturn1", badencodereturn1) | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 742 |         self.assertRaises(TypeError, "\xff".encode, "ascii", "test.badencodereturn1") | 
| Walter Dörwald | 30537a4 | 2003-01-08 23:22:13 +0000 | [diff] [blame] | 743 |  | 
 | 744 |         def badencodereturn2(exc): | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 745 |             return ("?", None) | 
| Walter Dörwald | 30537a4 | 2003-01-08 23:22:13 +0000 | [diff] [blame] | 746 |         codecs.register_error("test.badencodereturn2", badencodereturn2) | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 747 |         self.assertRaises(TypeError, "\xff".encode, "ascii", "test.badencodereturn2") | 
| Walter Dörwald | 30537a4 | 2003-01-08 23:22:13 +0000 | [diff] [blame] | 748 |  | 
| Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 749 |         handler = PosReturn() | 
 | 750 |         codecs.register_error("test.posreturn", handler.handle) | 
| Walter Dörwald | 30537a4 | 2003-01-08 23:22:13 +0000 | [diff] [blame] | 751 |  | 
| Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 752 |         # Valid negative position | 
 | 753 |         handler.pos = -1 | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 754 |         self.assertEquals("\xff0".encode("ascii", "test.posreturn"), b"<?>0") | 
| Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 755 |  | 
 | 756 |         # Valid negative position | 
 | 757 |         handler.pos = -2 | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 758 |         self.assertEquals("\xff0".encode("ascii", "test.posreturn"), b"<?><?>") | 
| Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 759 |  | 
 | 760 |         # Negative position out of bounds | 
 | 761 |         handler.pos = -3 | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 762 |         self.assertRaises(IndexError, "\xff0".encode, "ascii", "test.posreturn") | 
| Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 763 |  | 
 | 764 |         # Valid positive position | 
 | 765 |         handler.pos = 1 | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 766 |         self.assertEquals("\xff0".encode("ascii", "test.posreturn"), b"<?>0") | 
| Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 767 |  | 
 | 768 |         # Largest valid positive position (one beyond end of input | 
 | 769 |         handler.pos = 2 | 
| Walter Dörwald | 00048f0 | 2007-05-09 10:44:06 +0000 | [diff] [blame] | 770 |         self.assertEquals("\xff0".encode("ascii", "test.posreturn"), b"<?>") | 
| Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 771 |  | 
 | 772 |         # Invalid positive position | 
 | 773 |         handler.pos = 3 | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 774 |         self.assertRaises(IndexError, "\xff0".encode, "ascii", "test.posreturn") | 
| Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 775 |  | 
 | 776 |         handler.pos = 0 | 
| Walter Dörwald | 30537a4 | 2003-01-08 23:22:13 +0000 | [diff] [blame] | 777 |  | 
 | 778 |         class D(dict): | 
 | 779 |             def __getitem__(self, key): | 
 | 780 |                 raise ValueError | 
| Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 781 |         for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"): | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 782 |             self.assertRaises(UnicodeError, codecs.charmap_encode, "\xff", err, {0xff: None}) | 
 | 783 |             self.assertRaises(ValueError, codecs.charmap_encode, "\xff", err, D()) | 
 | 784 |             self.assertRaises(TypeError, codecs.charmap_encode, "\xff", err, {0xff: 300}) | 
| Walter Dörwald | 30537a4 | 2003-01-08 23:22:13 +0000 | [diff] [blame] | 785 |  | 
 | 786 |     def test_translatehelper(self): | 
 | 787 |         # enhance coverage of: | 
 | 788 |         # Objects/unicodeobject.c::unicode_encode_call_errorhandler() | 
 | 789 |         # and callers | 
 | 790 |         # (Unfortunately the errors argument is not directly accessible | 
 | 791 |         # from Python, so we can't test that much) | 
 | 792 |         class D(dict): | 
 | 793 |             def __getitem__(self, key): | 
 | 794 |                 raise ValueError | 
| Georg Brandl | edbcc13 | 2007-10-24 21:25:34 +0000 | [diff] [blame] | 795 |         #self.assertRaises(ValueError, "\xff".translate, D()) | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 796 |         self.assertRaises(TypeError, "\xff".translate, {0xff: sys.maxunicode+1}) | 
 | 797 |         self.assertRaises(TypeError, "\xff".translate, {0xff: ()}) | 
| Walter Dörwald | 30537a4 | 2003-01-08 23:22:13 +0000 | [diff] [blame] | 798 |  | 
| Walter Dörwald | 4894c30 | 2003-10-24 14:25:28 +0000 | [diff] [blame] | 799 |     def test_bug828737(self): | 
 | 800 |         charmap = { | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 801 |             ord("&"): "&", | 
 | 802 |             ord("<"): "<", | 
 | 803 |             ord(">"): ">", | 
 | 804 |             ord('"'): """, | 
| Walter Dörwald | 4894c30 | 2003-10-24 14:25:28 +0000 | [diff] [blame] | 805 |         } | 
| Tim Peters | 58eb11c | 2004-01-18 20:29:55 +0000 | [diff] [blame] | 806 |  | 
| Walter Dörwald | 4894c30 | 2003-10-24 14:25:28 +0000 | [diff] [blame] | 807 |         for n in (1, 10, 100, 1000): | 
| Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 808 |             text = 'abc<def>ghi'*n | 
| Walter Dörwald | 4894c30 | 2003-10-24 14:25:28 +0000 | [diff] [blame] | 809 |             text.translate(charmap) | 
 | 810 |  | 
| Walter Dörwald | e78178e | 2007-07-30 13:31:40 +0000 | [diff] [blame] | 811 |     def test_mutatingdecodehandler(self): | 
 | 812 |         baddata = [ | 
 | 813 |             ("ascii", b"\xff"), | 
 | 814 |             ("utf-7", b"++"), | 
 | 815 |             ("utf-8",  b"\xff"), | 
 | 816 |             ("utf-16", b"\xff"), | 
| Walter Dörwald | 41980ca | 2007-08-16 21:55:45 +0000 | [diff] [blame] | 817 |             ("utf-32", b"\xff"), | 
| Walter Dörwald | e78178e | 2007-07-30 13:31:40 +0000 | [diff] [blame] | 818 |             ("unicode-escape", b"\\u123g"), | 
 | 819 |             ("raw-unicode-escape", b"\\u123g"), | 
 | 820 |             ("unicode-internal", b"\xff"), | 
 | 821 |         ] | 
 | 822 |  | 
 | 823 |         def replacing(exc): | 
 | 824 |             if isinstance(exc, UnicodeDecodeError): | 
 | 825 |                 exc.object = 42 | 
 | 826 |                 return ("\u4242", 0) | 
 | 827 |             else: | 
 | 828 |                 raise TypeError("don't know how to handle %r" % exc) | 
 | 829 |         codecs.register_error("test.replacing", replacing) | 
 | 830 |         for (encoding, data) in baddata: | 
 | 831 |             self.assertRaises(TypeError, data.decode, encoding, "test.replacing") | 
 | 832 |  | 
 | 833 |         def mutating(exc): | 
 | 834 |             if isinstance(exc, UnicodeDecodeError): | 
 | 835 |                 exc.object[:] = b"" | 
 | 836 |                 return ("\u4242", 0) | 
 | 837 |             else: | 
 | 838 |                 raise TypeError("don't know how to handle %r" % exc) | 
 | 839 |         codecs.register_error("test.mutating", mutating) | 
 | 840 |         # If the decoder doesn't pick up the modified input the following | 
 | 841 |         # will lead to an endless loop | 
 | 842 |         for (encoding, data) in baddata: | 
 | 843 |             self.assertRaises(TypeError, data.decode, encoding, "test.replacing") | 
 | 844 |  | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 845 | def test_main(): | 
| Benjamin Peterson | ee8712c | 2008-05-20 21:35:26 +0000 | [diff] [blame] | 846 |     test.support.run_unittest(CodecCallbackTest) | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 847 |  | 
 | 848 | if __name__ == "__main__": | 
 | 849 |     test_main() |