| Guido van Rossum | a831cac | 2000-03-10 23:23:21 +0000 | [diff] [blame] | 1 | """ Test script for the Unicode implementation. | 
 | 2 |  | 
| Guido van Rossum | a831cac | 2000-03-10 23:23:21 +0000 | [diff] [blame] | 3 | Written by Marc-Andre Lemburg (mal@lemburg.com). | 
 | 4 |  | 
 | 5 | (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. | 
 | 6 |  | 
 | 7 | """ | 
 | 8 | from test_support import verbose | 
 | 9 | import sys | 
 | 10 |  | 
 | 11 | def test(method, input, output, *args): | 
 | 12 |     if verbose: | 
 | 13 |         print '%s.%s%s =? %s... ' % (repr(input), method, args, output), | 
 | 14 |     try: | 
 | 15 |         f = getattr(input, method) | 
 | 16 |         value = apply(f, args) | 
 | 17 |     except: | 
 | 18 |         value = sys.exc_type | 
| Guido van Rossum | 6650320 | 2000-04-28 20:39:58 +0000 | [diff] [blame] | 19 |         exc = sys.exc_info()[:2] | 
| Guido van Rossum | a831cac | 2000-03-10 23:23:21 +0000 | [diff] [blame] | 20 |     else: | 
 | 21 |         exc = None | 
 | 22 |     if value != output: | 
 | 23 |         if verbose: | 
 | 24 |             print 'no' | 
 | 25 |         print '*',f, `input`, `output`, `value` | 
 | 26 |         if exc: | 
| Guido van Rossum | 6650320 | 2000-04-28 20:39:58 +0000 | [diff] [blame] | 27 |             print '  value == %s: %s' % (exc) | 
| Guido van Rossum | a831cac | 2000-03-10 23:23:21 +0000 | [diff] [blame] | 28 |     else: | 
 | 29 |         if verbose: | 
 | 30 |             print 'yes' | 
 | 31 |  | 
 | 32 | test('capitalize', u' hello ', u' hello ') | 
 | 33 | test('capitalize', u'hello ', u'Hello ') | 
 | 34 |  | 
 | 35 | test('title', u' hello ', u' Hello ') | 
 | 36 | test('title', u'hello ', u'Hello ') | 
 | 37 | test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String') | 
 | 38 | test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String') | 
 | 39 | test('title', u"getInt", u'Getint') | 
 | 40 |  | 
 | 41 | test('find', u'abcdefghiabc', 0, u'abc') | 
 | 42 | test('find', u'abcdefghiabc', 9, u'abc', 1) | 
 | 43 | test('find', u'abcdefghiabc', -1, u'def', 4) | 
 | 44 |  | 
 | 45 | test('rfind', u'abcdefghiabc', 9, u'abc') | 
 | 46 |  | 
 | 47 | test('lower', u'HeLLo', u'hello') | 
 | 48 | test('lower', u'hello', u'hello') | 
 | 49 |  | 
 | 50 | test('upper', u'HeLLo', u'HELLO') | 
 | 51 | test('upper', u'HELLO', u'HELLO') | 
 | 52 |  | 
 | 53 | if 0: | 
 | 54 |     transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377' | 
 | 55 |  | 
 | 56 |     test('maketrans', u'abc', transtable, u'xyz') | 
 | 57 |     test('maketrans', u'abc', ValueError, u'xyzq') | 
 | 58 |  | 
 | 59 | test('split', u'this is the split function', | 
 | 60 |      [u'this', u'is', u'the', u'split', u'function']) | 
 | 61 | test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|') | 
 | 62 | test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2) | 
 | 63 | test('split', u'a b c d', [u'a', u'b c d'], None, 1) | 
 | 64 | test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2) | 
 | 65 | test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3) | 
 | 66 | test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4) | 
 | 67 | test('split', u'a b c d', [u'a b c d'], None, 0) | 
 | 68 | test('split', u'a  b  c  d', [u'a', u'b', u'c  d'], None, 2) | 
 | 69 | test('split', u'a b c d ', [u'a', u'b', u'c', u'd']) | 
 | 70 |  | 
 | 71 | # join now works with any sequence type | 
 | 72 | class Sequence: | 
 | 73 |     def __init__(self): self.seq = 'wxyz' | 
 | 74 |     def __len__(self): return len(self.seq) | 
 | 75 |     def __getitem__(self, i): return self.seq[i] | 
 | 76 |  | 
 | 77 | test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd']) | 
 | 78 | test('join', u'', u'abcd', (u'a', u'b', u'c', u'd')) | 
 | 79 | test('join', u' ', u'w x y z', Sequence()) | 
 | 80 | test('join', u' ', TypeError, 7) | 
 | 81 |  | 
 | 82 | class BadSeq(Sequence): | 
 | 83 |     def __init__(self): self.seq = [7, u'hello', 123L] | 
 | 84 |  | 
 | 85 | test('join', u' ', TypeError, BadSeq()) | 
 | 86 |  | 
 | 87 | result = u'' | 
 | 88 | for i in range(10): | 
 | 89 |     if i > 0: | 
 | 90 |         result = result + u':' | 
 | 91 |     result = result + u'x'*10 | 
 | 92 | test('join', u':', result, [u'x' * 10] * 10) | 
 | 93 | test('join', u':', result, (u'x' * 10,) * 10) | 
 | 94 |  | 
 | 95 | test('strip', u'   hello   ', u'hello') | 
 | 96 | test('lstrip', u'   hello   ', u'hello   ') | 
 | 97 | test('rstrip', u'   hello   ', u'   hello') | 
 | 98 | test('strip', u'hello', u'hello') | 
 | 99 |  | 
 | 100 | test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS') | 
 | 101 |  | 
 | 102 | if 0: | 
 | 103 |     test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def') | 
 | 104 |  | 
 | 105 |     table = string.maketrans('a', u'A') | 
 | 106 |     test('translate', u'abc', u'Abc', table) | 
 | 107 |     test('translate', u'xyz', u'xyz', table) | 
 | 108 |  | 
 | 109 | test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1) | 
| Barry Warsaw | 51ac580 | 2000-03-20 16:36:48 +0000 | [diff] [blame] | 110 | test('replace', u'one!two!three!', u'onetwothree', '!', '') | 
| Guido van Rossum | a831cac | 2000-03-10 23:23:21 +0000 | [diff] [blame] | 111 | test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2) | 
 | 112 | test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3) | 
 | 113 | test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4) | 
 | 114 | test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0) | 
 | 115 | test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@') | 
 | 116 | test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@') | 
 | 117 | test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2) | 
 | 118 |  | 
 | 119 | test('startswith', u'hello', 1, u'he') | 
 | 120 | test('startswith', u'hello', 1, u'hello') | 
 | 121 | test('startswith', u'hello', 0, u'hello world') | 
 | 122 | test('startswith', u'hello', 1, u'') | 
 | 123 | test('startswith', u'hello', 0, u'ello') | 
 | 124 | test('startswith', u'hello', 1, u'ello', 1) | 
 | 125 | test('startswith', u'hello', 1, u'o', 4) | 
 | 126 | test('startswith', u'hello', 0, u'o', 5) | 
 | 127 | test('startswith', u'hello', 1, u'', 5) | 
 | 128 | test('startswith', u'hello', 0, u'lo', 6) | 
 | 129 | test('startswith', u'helloworld', 1, u'lowo', 3) | 
 | 130 | test('startswith', u'helloworld', 1, u'lowo', 3, 7) | 
 | 131 | test('startswith', u'helloworld', 0, u'lowo', 3, 6) | 
 | 132 |  | 
 | 133 | test('endswith', u'hello', 1, u'lo') | 
 | 134 | test('endswith', u'hello', 0, u'he') | 
 | 135 | test('endswith', u'hello', 1, u'') | 
 | 136 | test('endswith', u'hello', 0, u'hello world') | 
 | 137 | test('endswith', u'helloworld', 0, u'worl') | 
 | 138 | test('endswith', u'helloworld', 1, u'worl', 3, 9) | 
 | 139 | test('endswith', u'helloworld', 1, u'world', 3, 12) | 
 | 140 | test('endswith', u'helloworld', 1, u'lowo', 1, 7) | 
 | 141 | test('endswith', u'helloworld', 1, u'lowo', 2, 7) | 
 | 142 | test('endswith', u'helloworld', 1, u'lowo', 3, 7) | 
 | 143 | test('endswith', u'helloworld', 0, u'lowo', 4, 7) | 
 | 144 | test('endswith', u'helloworld', 0, u'lowo', 3, 8) | 
 | 145 | test('endswith', u'ab', 0, u'ab', 0, 1) | 
 | 146 | test('endswith', u'ab', 0, u'ab', 0, 0) | 
 | 147 |  | 
 | 148 | test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab      def\ng       hi') | 
 | 149 | test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab      def\ng       hi', 8) | 
 | 150 | test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab  def\ng   hi', 4) | 
 | 151 | test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab  def\ng   hi', 4) | 
 | 152 |  | 
 | 153 | if 0: | 
 | 154 |     test('capwords', u'abc def ghi', u'Abc Def Ghi') | 
 | 155 |     test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi') | 
 | 156 |     test('capwords', u'abc\t   def  \nghi', u'Abc Def Ghi') | 
 | 157 |  | 
 | 158 | # Comparisons: | 
 | 159 | print 'Testing Unicode comparisons...', | 
 | 160 | assert u'abc' == 'abc' | 
 | 161 | assert 'abc' == u'abc' | 
 | 162 | assert u'abc' == u'abc' | 
 | 163 | assert u'abcd' > 'abc' | 
 | 164 | assert 'abcd' > u'abc' | 
 | 165 | assert u'abcd' > u'abc' | 
 | 166 | assert u'abc' < 'abcd' | 
 | 167 | assert 'abc' < u'abcd' | 
 | 168 | assert u'abc' < u'abcd' | 
 | 169 | print 'done.' | 
 | 170 |  | 
| Marc-André Lemburg | d6d06ad | 2000-07-07 17:48:52 +0000 | [diff] [blame] | 171 | print 'Testing UTF-16 code point order comparisons...', | 
 | 172 | #No surrogates, no fixup required. | 
 | 173 | assert u'\u0061' < u'\u20ac' | 
 | 174 | # Non surrogate below surrogate value, no fixup required | 
 | 175 | assert u'\u0061' < u'\ud800\udc02' | 
 | 176 |  | 
 | 177 | # Non surrogate above surrogate value, fixup required | 
 | 178 | def test_lecmp(s, s2): | 
 | 179 |   assert s <  s2 , "comparison failed on %s < %s" % (s, s2) | 
 | 180 |    | 
 | 181 | def test_fixup(s): | 
 | 182 |   s2 = u'\ud800\udc01' | 
 | 183 |   test_lecmp(s, s2) | 
 | 184 |   s2 = u'\ud900\udc01' | 
 | 185 |   test_lecmp(s, s2) | 
 | 186 |   s2 = u'\uda00\udc01' | 
 | 187 |   test_lecmp(s, s2) | 
 | 188 |   s2 = u'\udb00\udc01' | 
 | 189 |   test_lecmp(s, s2) | 
 | 190 |   s2 = u'\ud800\udd01' | 
 | 191 |   test_lecmp(s, s2) | 
 | 192 |   s2 = u'\ud900\udd01' | 
 | 193 |   test_lecmp(s, s2) | 
 | 194 |   s2 = u'\uda00\udd01' | 
 | 195 |   test_lecmp(s, s2) | 
 | 196 |   s2 = u'\udb00\udd01' | 
 | 197 |   test_lecmp(s, s2) | 
 | 198 |   s2 = u'\ud800\ude01' | 
 | 199 |   test_lecmp(s, s2) | 
 | 200 |   s2 = u'\ud900\ude01' | 
 | 201 |   test_lecmp(s, s2) | 
 | 202 |   s2 = u'\uda00\ude01' | 
 | 203 |   test_lecmp(s, s2) | 
 | 204 |   s2 = u'\udb00\ude01' | 
 | 205 |   test_lecmp(s, s2) | 
 | 206 |   s2 = u'\ud800\udfff' | 
 | 207 |   test_lecmp(s, s2) | 
 | 208 |   s2 = u'\ud900\udfff' | 
 | 209 |   test_lecmp(s, s2) | 
 | 210 |   s2 = u'\uda00\udfff' | 
 | 211 |   test_lecmp(s, s2) | 
 | 212 |   s2 = u'\udb00\udfff' | 
 | 213 |   test_lecmp(s, s2) | 
 | 214 |  | 
 | 215 | test_fixup(u'\ue000') | 
 | 216 | test_fixup(u'\uff61') | 
 | 217 |  | 
 | 218 | # Surrogates on both sides, no fixup required | 
 | 219 | assert u'\ud800\udc02' < u'\ud84d\udc56' | 
 | 220 | print 'done.' | 
 | 221 |  | 
| Guido van Rossum | a831cac | 2000-03-10 23:23:21 +0000 | [diff] [blame] | 222 | test('ljust', u'abc',  u'abc       ', 10) | 
 | 223 | test('rjust', u'abc',  u'       abc', 10) | 
 | 224 | test('center', u'abc', u'   abc    ', 10) | 
 | 225 | test('ljust', u'abc',  u'abc   ', 6) | 
 | 226 | test('rjust', u'abc',  u'   abc', 6) | 
 | 227 | test('center', u'abc', u' abc  ', 6) | 
 | 228 | test('ljust', u'abc', u'abc', 2) | 
 | 229 | test('rjust', u'abc', u'abc', 2) | 
 | 230 | test('center', u'abc', u'abc', 2) | 
 | 231 |  | 
 | 232 | test('islower', u'a', 1) | 
 | 233 | test('islower', u'A', 0) | 
 | 234 | test('islower', u'\n', 0) | 
 | 235 | test('islower', u'\u1FFc', 0) | 
 | 236 | test('islower', u'abc', 1) | 
 | 237 | test('islower', u'aBc', 0) | 
 | 238 | test('islower', u'abc\n', 1) | 
 | 239 |  | 
 | 240 | test('isupper', u'a', 0) | 
 | 241 | test('isupper', u'A', 1) | 
 | 242 | test('isupper', u'\n', 0) | 
 | 243 | test('isupper', u'\u1FFc', 0) | 
 | 244 | test('isupper', u'ABC', 1) | 
 | 245 | test('isupper', u'AbC', 0) | 
 | 246 | test('isupper', u'ABC\n', 1) | 
 | 247 |  | 
 | 248 | test('istitle', u'a', 0) | 
 | 249 | test('istitle', u'A', 1) | 
 | 250 | test('istitle', u'\n', 0) | 
 | 251 | test('istitle', u'\u1FFc', 1) | 
 | 252 | test('istitle', u'A Titlecased Line', 1) | 
 | 253 | test('istitle', u'A\nTitlecased Line', 1) | 
 | 254 | test('istitle', u'A Titlecased, Line', 1) | 
 | 255 | test('istitle', u'Greek \u1FFcitlecases ...', 1) | 
 | 256 | test('istitle', u'Not a capitalized String', 0) | 
 | 257 | test('istitle', u'Not\ta Titlecase String', 0) | 
 | 258 | test('istitle', u'Not--a Titlecase String', 0) | 
 | 259 |  | 
| Marc-André Lemburg | 9d46741 | 2000-07-05 09:46:40 +0000 | [diff] [blame] | 260 | test('isalpha', u'a', 1) | 
 | 261 | test('isalpha', u'A', 1) | 
 | 262 | test('isalpha', u'\n', 0) | 
 | 263 | test('isalpha', u'\u1FFc', 1) | 
 | 264 | test('isalpha', u'abc', 1) | 
 | 265 | test('isalpha', u'aBc123', 0) | 
 | 266 | test('isalpha', u'abc\n', 0) | 
 | 267 |  | 
 | 268 | test('isalnum', u'a', 1) | 
 | 269 | test('isalnum', u'A', 1) | 
 | 270 | test('isalnum', u'\n', 0) | 
 | 271 | test('isalnum', u'123abc456', 1) | 
 | 272 | test('isalnum', u'a1b3c', 1) | 
 | 273 | test('isalnum', u'aBc000 ', 0) | 
 | 274 | test('isalnum', u'abc\n', 0) | 
 | 275 |  | 
| Guido van Rossum | a831cac | 2000-03-10 23:23:21 +0000 | [diff] [blame] | 276 | test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi']) | 
 | 277 | test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi']) | 
 | 278 | test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi']) | 
 | 279 | test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi']) | 
 | 280 | test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u'']) | 
 | 281 | test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u'']) | 
| Guido van Rossum | 7ee801d | 2000-04-11 15:37:02 +0000 | [diff] [blame] | 282 | test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1) | 
| Guido van Rossum | a831cac | 2000-03-10 23:23:21 +0000 | [diff] [blame] | 283 |  | 
 | 284 | test('translate', u"abababc", u'bbbc', {ord('a'):None}) | 
 | 285 | test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')}) | 
 | 286 | test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'}) | 
 | 287 |  | 
| Guido van Rossum | d4d2684 | 2000-03-13 23:21:48 +0000 | [diff] [blame] | 288 | # Contains: | 
 | 289 | print 'Testing Unicode contains method...', | 
| Guido van Rossum | 9e896b3 | 2000-04-05 20:11:21 +0000 | [diff] [blame] | 290 | assert ('a' in u'abdb') == 1 | 
 | 291 | assert ('a' in u'bdab') == 1 | 
 | 292 | assert ('a' in u'bdaba') == 1 | 
 | 293 | assert ('a' in u'bdba') == 1 | 
| Guido van Rossum | d4d2684 | 2000-03-13 23:21:48 +0000 | [diff] [blame] | 294 | assert ('a' in u'bdba') == 1 | 
 | 295 | assert (u'a' in u'bdba') == 1 | 
 | 296 | assert (u'a' in u'bdb') == 0 | 
 | 297 | assert (u'a' in 'bdb') == 0 | 
 | 298 | assert (u'a' in 'bdba') == 1 | 
| Guido van Rossum | 9e896b3 | 2000-04-05 20:11:21 +0000 | [diff] [blame] | 299 | assert (u'a' in ('a',1,None)) == 1 | 
 | 300 | assert (u'a' in (1,None,'a')) == 1 | 
 | 301 | assert (u'a' in (1,None,u'a')) == 1 | 
 | 302 | assert ('a' in ('a',1,None)) == 1 | 
 | 303 | assert ('a' in (1,None,'a')) == 1 | 
 | 304 | assert ('a' in (1,None,u'a')) == 1 | 
 | 305 | assert ('a' in ('x',1,u'y')) == 0 | 
 | 306 | assert ('a' in ('x',1,None)) == 0 | 
| Guido van Rossum | d4d2684 | 2000-03-13 23:21:48 +0000 | [diff] [blame] | 307 | print 'done.' | 
 | 308 |  | 
| Guido van Rossum | a831cac | 2000-03-10 23:23:21 +0000 | [diff] [blame] | 309 | # Formatting: | 
 | 310 | print 'Testing Unicode formatting strings...', | 
 | 311 | assert u"%s, %s" % (u"abc", "abc") == u'abc, abc' | 
 | 312 | assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000,  3.00' | 
 | 313 | assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000,  3.00' | 
 | 314 | assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000,  3.50' | 
 | 315 | assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000,  3.57' | 
 | 316 | assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57' | 
| Marc-André Lemburg | 59a044b | 2000-06-08 17:50:55 +0000 | [diff] [blame] | 317 | assert u"%c" % (u"a",) == u'a' | 
 | 318 | assert u"%c" % ("a",) == u'a' | 
| Guido van Rossum | a831cac | 2000-03-10 23:23:21 +0000 | [diff] [blame] | 319 | assert u"%c" % (34,) == u'"' | 
 | 320 | assert u"%c" % (36,) == u'$' | 
| Marc-André Lemburg | 8462573 | 2000-06-13 12:05:36 +0000 | [diff] [blame] | 321 | value = u"%r, %r" % (u"abc", "abc")  | 
 | 322 | if value != u"u'abc', 'abc'": | 
 | 323 |     print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")' | 
 | 324 |  | 
| Guido van Rossum | a831cac | 2000-03-10 23:23:21 +0000 | [diff] [blame] | 325 | assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def' | 
| Marc-André Lemburg | 8462573 | 2000-06-13 12:05:36 +0000 | [diff] [blame] | 326 | try: | 
 | 327 |     value = u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"}  | 
 | 328 | except KeyError: | 
 | 329 |     print '*** formatting failed for "%s"' % "u'abc, def'" | 
 | 330 | else: | 
 | 331 |     assert value == u'abc, def' | 
 | 332 |  | 
| Guido van Rossum | 9706486 | 2000-04-10 13:52:48 +0000 | [diff] [blame] | 333 | # formatting jobs delegated from the string implementation: | 
 | 334 | assert '...%(foo)s...' % {'foo':u"abc"} == u'...abc...' | 
 | 335 | assert '...%(foo)s...' % {'foo':"abc"} == '...abc...' | 
 | 336 | assert '...%(foo)s...' % {u'foo':"abc"} == '...abc...' | 
 | 337 | assert '...%(foo)s...' % {u'foo':u"abc"} == u'...abc...' | 
 | 338 | assert '...%(foo)s...' % {u'foo':u"abc",'def':123} ==  u'...abc...' | 
 | 339 | assert '...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...' | 
 | 340 | assert '...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...' | 
 | 341 | assert '...%s...' % u"abc" == u'...abc...' | 
| Guido van Rossum | a831cac | 2000-03-10 23:23:21 +0000 | [diff] [blame] | 342 | print 'done.' | 
 | 343 |  | 
| Guido van Rossum | d8855fd | 2000-03-24 22:14:19 +0000 | [diff] [blame] | 344 | # Test builtin codecs | 
 | 345 | print 'Testing builtin codecs...', | 
 | 346 |  | 
| Marc-André Lemburg | d6d06ad | 2000-07-07 17:48:52 +0000 | [diff] [blame] | 347 | # UTF-8 specific encoding tests: | 
 | 348 | assert u'\u20ac'.encode('utf-8') == \ | 
 | 349 |        ''.join((chr(0xe2), chr(0x82), chr(0xac))) | 
 | 350 | assert u'\ud800\udc02'.encode('utf-8') == \ | 
 | 351 |        ''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))) | 
 | 352 | assert u'\ud84d\udc56'.encode('utf-8') == \ | 
 | 353 |        ''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))) | 
 | 354 | # UTF-8 specific decoding tests | 
 | 355 | assert unicode(''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))), | 
 | 356 |                'utf-8') == u'\ud84d\udc56' | 
 | 357 | assert unicode(''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))), | 
 | 358 |                'utf-8') == u'\ud800\udc02' | 
 | 359 | assert unicode(''.join((chr(0xe2), chr(0x82), chr(0xac))), | 
 | 360 |                'utf-8') == u'\u20ac' | 
 | 361 |  | 
 | 362 | # Other possible utf-8 test cases: | 
 | 363 | # * strict decoding testing for all of the | 
 | 364 | #   UTF8_ERROR cases in PyUnicode_DecodeUTF8 | 
 | 365 |  | 
 | 366 |  | 
 | 367 |  | 
| Guido van Rossum | d8855fd | 2000-03-24 22:14:19 +0000 | [diff] [blame] | 368 | assert unicode('hello','ascii') == u'hello' | 
 | 369 | assert unicode('hello','utf-8') == u'hello' | 
 | 370 | assert unicode('hello','utf8') == u'hello' | 
 | 371 | assert unicode('hello','latin-1') == u'hello' | 
 | 372 |  | 
| Marc-André Lemburg | b6d78fcd | 2000-07-07 13:46:19 +0000 | [diff] [blame] | 373 | class String: | 
 | 374 |     x = '' | 
 | 375 |     def __str__(self): | 
 | 376 |         return self.x | 
 | 377 |  | 
 | 378 | o = String() | 
 | 379 |  | 
 | 380 | o.x = 'abc' | 
 | 381 | assert unicode(o) == u'abc' | 
 | 382 | assert str(o) == 'abc' | 
 | 383 |  | 
 | 384 | o.x = u'abc' | 
 | 385 | assert unicode(o) == u'abc' | 
 | 386 | assert str(o) == 'abc' | 
 | 387 |  | 
| Guido van Rossum | 9706486 | 2000-04-10 13:52:48 +0000 | [diff] [blame] | 388 | try: | 
 | 389 |     u'Andr\202 x'.encode('ascii') | 
 | 390 |     u'Andr\202 x'.encode('ascii','strict') | 
 | 391 | except ValueError: | 
 | 392 |     pass | 
 | 393 | else: | 
 | 394 |     raise AssertionError, "u'Andr\202'.encode('ascii') failed to raise an exception" | 
 | 395 | assert u'Andr\202 x'.encode('ascii','ignore') == "Andr x" | 
 | 396 | assert u'Andr\202 x'.encode('ascii','replace') == "Andr? x" | 
 | 397 |  | 
 | 398 | try: | 
 | 399 |     unicode('Andr\202 x','ascii') | 
 | 400 |     unicode('Andr\202 x','ascii','strict') | 
 | 401 | except ValueError: | 
 | 402 |     pass | 
 | 403 | else: | 
 | 404 |     raise AssertionError, "unicode('Andr\202') failed to raise an exception" | 
 | 405 | assert unicode('Andr\202 x','ascii','ignore') == u"Andr x" | 
 | 406 | assert unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x' | 
 | 407 |  | 
| Guido van Rossum | d8855fd | 2000-03-24 22:14:19 +0000 | [diff] [blame] | 408 | assert u'hello'.encode('ascii') == 'hello' | 
 | 409 | assert u'hello'.encode('utf-8') == 'hello' | 
 | 410 | assert u'hello'.encode('utf8') == 'hello' | 
 | 411 | assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000' | 
 | 412 | assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o' | 
 | 413 | assert u'hello'.encode('latin-1') == 'hello' | 
 | 414 |  | 
 | 415 | u = u''.join(map(unichr, range(1024))) | 
 | 416 | for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be', | 
 | 417 |                  'raw_unicode_escape', 'unicode_escape', 'unicode_internal'): | 
 | 418 |     assert unicode(u.encode(encoding),encoding) == u | 
 | 419 |  | 
 | 420 | u = u''.join(map(unichr, range(256))) | 
| Guido van Rossum | 9e896b3 | 2000-04-05 20:11:21 +0000 | [diff] [blame] | 421 | for encoding in ( | 
 | 422 |     'latin-1', | 
 | 423 |     ): | 
 | 424 |     try: | 
 | 425 |         assert unicode(u.encode(encoding),encoding) == u | 
 | 426 |     except AssertionError: | 
 | 427 |         print '*** codec "%s" failed round-trip' % encoding | 
 | 428 |     except ValueError,why: | 
 | 429 |         print '*** codec for "%s" failed: %s' % (encoding, why) | 
| Guido van Rossum | d8855fd | 2000-03-24 22:14:19 +0000 | [diff] [blame] | 430 |  | 
 | 431 | u = u''.join(map(unichr, range(128))) | 
| Guido van Rossum | 9e896b3 | 2000-04-05 20:11:21 +0000 | [diff] [blame] | 432 | for encoding in ( | 
 | 433 |     'ascii', | 
 | 434 |     ): | 
 | 435 |     try: | 
 | 436 |         assert unicode(u.encode(encoding),encoding) == u | 
 | 437 |     except AssertionError: | 
 | 438 |         print '*** codec "%s" failed round-trip' % encoding | 
 | 439 |     except ValueError,why: | 
 | 440 |         print '*** codec for "%s" failed: %s' % (encoding, why) | 
 | 441 |  | 
 | 442 | print 'done.' | 
 | 443 |  | 
 | 444 | print 'Testing standard mapping codecs...', | 
 | 445 |  | 
 | 446 | print '0-127...', | 
 | 447 | s = ''.join(map(chr, range(128))) | 
 | 448 | for encoding in ( | 
 | 449 |     'cp037', 'cp1026', | 
 | 450 |     'cp437', 'cp500', 'cp737', 'cp775', 'cp850', | 
 | 451 |     'cp852', 'cp855', 'cp860', 'cp861', 'cp862', | 
 | 452 |     'cp863', 'cp865', 'cp866',  | 
 | 453 |     'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15', | 
 | 454 |     'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6', | 
 | 455 |     'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1', | 
 | 456 |     'mac_cyrillic', 'mac_latin2', | 
 | 457 |  | 
 | 458 |     'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', | 
 | 459 |     'cp1256', 'cp1257', 'cp1258', | 
 | 460 |     'cp856', 'cp857', 'cp864', 'cp869', 'cp874', | 
 | 461 |  | 
 | 462 |     'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish', | 
 | 463 |     'cp1006', 'cp875', 'iso8859_8', | 
 | 464 |      | 
 | 465 |     ### These have undefined mappings: | 
 | 466 |     #'cp424', | 
 | 467 |      | 
 | 468 |     ): | 
 | 469 |     try: | 
 | 470 |         assert unicode(s,encoding).encode(encoding) == s | 
 | 471 |     except AssertionError: | 
 | 472 |         print '*** codec "%s" failed round-trip' % encoding | 
 | 473 |     except ValueError,why: | 
 | 474 |         print '*** codec for "%s" failed: %s' % (encoding, why) | 
 | 475 |  | 
 | 476 | print '128-255...', | 
 | 477 | s = ''.join(map(chr, range(128,256))) | 
 | 478 | for encoding in ( | 
 | 479 |     'cp037', 'cp1026', | 
 | 480 |     'cp437', 'cp500', 'cp737', 'cp775', 'cp850', | 
 | 481 |     'cp852', 'cp855', 'cp860', 'cp861', 'cp862', | 
 | 482 |     'cp863', 'cp865', 'cp866',  | 
 | 483 |     'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15', | 
 | 484 |     'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6', | 
 | 485 |     'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1', | 
 | 486 |     'mac_cyrillic', 'mac_latin2', | 
 | 487 |      | 
 | 488 |     ### These have undefined mappings: | 
 | 489 |     #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', | 
 | 490 |     #'cp1256', 'cp1257', 'cp1258', | 
 | 491 |     #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874', | 
 | 492 |     #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish', | 
 | 493 |      | 
 | 494 |     ### These fail the round-trip: | 
 | 495 |     #'cp1006', 'cp875', 'iso8859_8', | 
 | 496 |      | 
 | 497 |     ): | 
 | 498 |     try: | 
 | 499 |         assert unicode(s,encoding).encode(encoding) == s | 
 | 500 |     except AssertionError: | 
 | 501 |         print '*** codec "%s" failed round-trip' % encoding | 
 | 502 |     except ValueError,why: | 
 | 503 |         print '*** codec for "%s" failed: %s' % (encoding, why) | 
| Guido van Rossum | d8855fd | 2000-03-24 22:14:19 +0000 | [diff] [blame] | 504 |  | 
 | 505 | print 'done.' | 
| Fred Drake | e0243e2 | 2000-04-13 14:11:56 +0000 | [diff] [blame] | 506 |  | 
 | 507 | print 'Testing Unicode string concatenation...', | 
 | 508 | assert (u"abc" u"def") == u"abcdef" | 
 | 509 | assert ("abc" u"def") == u"abcdef" | 
 | 510 | assert (u"abc" "def") == u"abcdef" | 
 | 511 | assert (u"abc" u"def" "ghi") == u"abcdefghi" | 
 | 512 | assert ("abc" "def" u"ghi") == u"abcdefghi" | 
 | 513 | print 'done.' | 
| Marc-André Lemburg | a6f73d6 | 2000-06-28 16:41:23 +0000 | [diff] [blame] | 514 |  |