Marc-André Lemburg | 6cdec2e | 2000-06-30 09:45:20 +0000 | [diff] [blame] | 1 | """ Test script for the Unicode implementation. |
| 2 | |
| 3 | Written by Bill Tutt. |
| 4 | |
| 5 | (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. |
| 6 | |
| 7 | """#" |
Marc-André Lemburg | 3661908 | 2001-01-17 19:11:13 +0000 | [diff] [blame] | 8 | from test_support import verify, verbose |
| 9 | |
Marc-André Lemburg | 6cdec2e | 2000-06-30 09:45:20 +0000 | [diff] [blame] | 10 | print 'Testing General Unicode Character Name, and case insensitivity...', |
| 11 | |
| 12 | # General and case insensitivity test: |
| 13 | s = u"\N{LATIN CAPITAL LETTER T}" \ |
| 14 | u"\N{LATIN SMALL LETTER H}" \ |
| 15 | u"\N{LATIN SMALL LETTER E}" \ |
| 16 | u"\N{SPACE}" \ |
| 17 | u"\N{LATIN SMALL LETTER R}" \ |
| 18 | u"\N{LATIN CAPITAL LETTER E}" \ |
| 19 | u"\N{LATIN SMALL LETTER D}" \ |
| 20 | u"\N{SPACE}" \ |
| 21 | u"\N{LATIN SMALL LETTER f}" \ |
| 22 | u"\N{LATIN CAPITAL LeTtEr o}" \ |
| 23 | u"\N{LATIN SMaLl LETTER x}" \ |
| 24 | u"\N{SPACE}" \ |
| 25 | u"\N{LATIN SMALL LETTER A}" \ |
| 26 | u"\N{LATIN SMALL LETTER T}" \ |
| 27 | u"\N{LATIN SMALL LETTER E}" \ |
| 28 | u"\N{SPACE}" \ |
| 29 | u"\N{LATIN SMALL LETTER T}" \ |
| 30 | u"\N{LATIN SMALL LETTER H}" \ |
| 31 | u"\N{LATIN SMALL LETTER E}" \ |
| 32 | u"\N{SpAcE}" \ |
| 33 | u"\N{LATIN SMALL LETTER S}" \ |
| 34 | u"\N{LATIN SMALL LETTER H}" \ |
| 35 | u"\N{LATIN SMALL LETTER E}" \ |
| 36 | u"\N{LATIN SMALL LETTER E}" \ |
| 37 | u"\N{LATIN SMALL LETTER P}" \ |
| 38 | u"\N{FULL STOP}" |
Marc-André Lemburg | 3661908 | 2001-01-17 19:11:13 +0000 | [diff] [blame] | 39 | verify(s == u"The rEd fOx ate the sheep.", s) |
Fredrik Lundh | 2acb54a | 2001-01-19 11:13:46 +0000 | [diff] [blame^] | 40 | print "done." |
Fredrik Lundh | ee865c6 | 2001-01-19 11:00:42 +0000 | [diff] [blame] | 41 | |
| 42 | import ucnhash |
| 43 | |
Fredrik Lundh | 2acb54a | 2001-01-19 11:13:46 +0000 | [diff] [blame^] | 44 | print "Testing name to code mapping....", |
Fredrik Lundh | ee865c6 | 2001-01-19 11:00:42 +0000 | [diff] [blame] | 45 | for char in "SPAM": |
| 46 | name = "LATIN SMALL LETTER %s" % char |
| 47 | code = ucnhash.getcode(name) |
| 48 | verify(ucnhash.getname(code) == name) |
Fredrik Lundh | 2acb54a | 2001-01-19 11:13:46 +0000 | [diff] [blame^] | 49 | print "done." |
Fredrik Lundh | ee865c6 | 2001-01-19 11:00:42 +0000 | [diff] [blame] | 50 | |
Fredrik Lundh | 2acb54a | 2001-01-19 11:13:46 +0000 | [diff] [blame^] | 51 | print "Testing code to name mapping for all characters....", |
| 52 | count = 0 |
Fredrik Lundh | ee865c6 | 2001-01-19 11:00:42 +0000 | [diff] [blame] | 53 | for code in range(65536): |
| 54 | try: |
| 55 | name = ucnhash.getname(code) |
| 56 | verify(ucnhash.getcode(name) == code) |
Fredrik Lundh | 2acb54a | 2001-01-19 11:13:46 +0000 | [diff] [blame^] | 57 | count += 1 |
Fredrik Lundh | ee865c6 | 2001-01-19 11:00:42 +0000 | [diff] [blame] | 58 | except ValueError: |
| 59 | pass |
Marc-André Lemburg | 6cdec2e | 2000-06-30 09:45:20 +0000 | [diff] [blame] | 60 | print "done." |
| 61 | |
Fredrik Lundh | 2acb54a | 2001-01-19 11:13:46 +0000 | [diff] [blame^] | 62 | print "Found", count, "characters in the unicode name database" |
| 63 | |
Marc-André Lemburg | 6cdec2e | 2000-06-30 09:45:20 +0000 | [diff] [blame] | 64 | # misc. symbol testing |
| 65 | print "Testing misc. symbols for unicode character name expansion....", |
Marc-André Lemburg | 3661908 | 2001-01-17 19:11:13 +0000 | [diff] [blame] | 66 | verify(u"\N{PILCROW SIGN}" == u"\u00b6") |
| 67 | verify(u"\N{REPLACEMENT CHARACTER}" == u"\uFFFD") |
| 68 | verify(u"\N{HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK}" == u"\uFF9F") |
| 69 | verify(u"\N{FULLWIDTH LATIN SMALL LETTER A}" == u"\uFF41") |
Marc-André Lemburg | 6cdec2e | 2000-06-30 09:45:20 +0000 | [diff] [blame] | 70 | print "done." |
| 71 | |
| 72 | |
| 73 | # strict error testing: |
| 74 | print "Testing unicode character name expansion strict error handling....", |
Marc-André Lemburg | 6cdec2e | 2000-06-30 09:45:20 +0000 | [diff] [blame] | 75 | try: |
Fred Drake | 004d5e6 | 2000-10-23 17:22:08 +0000 | [diff] [blame] | 76 | unicode("\N{blah}", 'unicode-escape', 'strict') |
Marc-André Lemburg | 6cdec2e | 2000-06-30 09:45:20 +0000 | [diff] [blame] | 77 | except UnicodeError: |
Fred Drake | 004d5e6 | 2000-10-23 17:22:08 +0000 | [diff] [blame] | 78 | pass |
Marc-André Lemburg | 6cdec2e | 2000-06-30 09:45:20 +0000 | [diff] [blame] | 79 | else: |
Fred Drake | 004d5e6 | 2000-10-23 17:22:08 +0000 | [diff] [blame] | 80 | raise AssertionError, "failed to raise an exception when given a bogus character name" |
Marc-André Lemburg | 6cdec2e | 2000-06-30 09:45:20 +0000 | [diff] [blame] | 81 | |
| 82 | try: |
Fredrik Lundh | 0fdb90c | 2001-01-19 09:45:02 +0000 | [diff] [blame] | 83 | unicode("\N{" + "x" * 100000 + "}", 'unicode-escape', 'strict') |
| 84 | except UnicodeError: |
| 85 | pass |
| 86 | else: |
| 87 | raise AssertionError, "failed to raise an exception when given a very " \ |
| 88 | "long bogus character name" |
| 89 | |
| 90 | try: |
Fred Drake | 004d5e6 | 2000-10-23 17:22:08 +0000 | [diff] [blame] | 91 | unicode("\N{SPACE", 'unicode-escape', 'strict') |
Marc-André Lemburg | 6cdec2e | 2000-06-30 09:45:20 +0000 | [diff] [blame] | 92 | except UnicodeError: |
Fred Drake | 004d5e6 | 2000-10-23 17:22:08 +0000 | [diff] [blame] | 93 | pass |
Marc-André Lemburg | 6cdec2e | 2000-06-30 09:45:20 +0000 | [diff] [blame] | 94 | else: |
Fred Drake | 004d5e6 | 2000-10-23 17:22:08 +0000 | [diff] [blame] | 95 | raise AssertionError, "failed to raise an exception for a missing closing brace." |
Marc-André Lemburg | 6cdec2e | 2000-06-30 09:45:20 +0000 | [diff] [blame] | 96 | |
| 97 | try: |
Fred Drake | 004d5e6 | 2000-10-23 17:22:08 +0000 | [diff] [blame] | 98 | unicode("\NSPACE", 'unicode-escape', 'strict') |
Marc-André Lemburg | 6cdec2e | 2000-06-30 09:45:20 +0000 | [diff] [blame] | 99 | except UnicodeError: |
Fred Drake | 004d5e6 | 2000-10-23 17:22:08 +0000 | [diff] [blame] | 100 | pass |
Marc-André Lemburg | 6cdec2e | 2000-06-30 09:45:20 +0000 | [diff] [blame] | 101 | else: |
Fred Drake | 004d5e6 | 2000-10-23 17:22:08 +0000 | [diff] [blame] | 102 | raise AssertionError, "failed to raise an exception for a missing opening brace." |
Marc-André Lemburg | 6cdec2e | 2000-06-30 09:45:20 +0000 | [diff] [blame] | 103 | print "done." |