blob: 0797f2c79c5519ea86e65786e41300f86eb5620b [file] [log] [blame]
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +00001""" Test script for the Unicode implementation.
2
3Written by Bill Tutt.
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
7"""#"
Marc-André Lemburg36619082001-01-17 19:11:13 +00008from test_support import verify, verbose
9
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +000010print 'Testing General Unicode Character Name, and case insensitivity...',
11
12# General and case insensitivity test:
Fredrik Lundhf6056062001-01-20 11:15:25 +000013try:
14 # put all \N escapes inside exec'd raw strings, to make sure this
15 # script runs even if the compiler chokes on \N escapes
16 exec r"""
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +000017s = u"\N{LATIN CAPITAL LETTER T}" \
18 u"\N{LATIN SMALL LETTER H}" \
19 u"\N{LATIN SMALL LETTER E}" \
20 u"\N{SPACE}" \
21 u"\N{LATIN SMALL LETTER R}" \
22 u"\N{LATIN CAPITAL LETTER E}" \
23 u"\N{LATIN SMALL LETTER D}" \
24 u"\N{SPACE}" \
25 u"\N{LATIN SMALL LETTER f}" \
26 u"\N{LATIN CAPITAL LeTtEr o}" \
27 u"\N{LATIN SMaLl LETTER x}" \
28 u"\N{SPACE}" \
29 u"\N{LATIN SMALL LETTER A}" \
30 u"\N{LATIN SMALL LETTER T}" \
31 u"\N{LATIN SMALL LETTER E}" \
32 u"\N{SPACE}" \
33 u"\N{LATIN SMALL LETTER T}" \
34 u"\N{LATIN SMALL LETTER H}" \
35 u"\N{LATIN SMALL LETTER E}" \
36 u"\N{SpAcE}" \
37 u"\N{LATIN SMALL LETTER S}" \
38 u"\N{LATIN SMALL LETTER H}" \
39 u"\N{LATIN SMALL LETTER E}" \
40 u"\N{LATIN SMALL LETTER E}" \
41 u"\N{LATIN SMALL LETTER P}" \
42 u"\N{FULL STOP}"
Marc-André Lemburg36619082001-01-17 19:11:13 +000043verify(s == u"The rEd fOx ate the sheep.", s)
Fredrik Lundhf6056062001-01-20 11:15:25 +000044"""
45except UnicodeError, v:
46 print v
Fredrik Lundh2acb54a2001-01-19 11:13:46 +000047print "done."
Fredrik Lundhee865c62001-01-19 11:00:42 +000048
49import ucnhash
50
Fredrik Lundh2acb54a2001-01-19 11:13:46 +000051print "Testing name to code mapping....",
Fredrik Lundhee865c62001-01-19 11:00:42 +000052for char in "SPAM":
53 name = "LATIN SMALL LETTER %s" % char
54 code = ucnhash.getcode(name)
55 verify(ucnhash.getname(code) == name)
Fredrik Lundh2acb54a2001-01-19 11:13:46 +000056print "done."
Fredrik Lundhee865c62001-01-19 11:00:42 +000057
Fredrik Lundh2acb54a2001-01-19 11:13:46 +000058print "Testing code to name mapping for all characters....",
59count = 0
Fredrik Lundhee865c62001-01-19 11:00:42 +000060for code in range(65536):
61 try:
62 name = ucnhash.getname(code)
63 verify(ucnhash.getcode(name) == code)
Fredrik Lundh2acb54a2001-01-19 11:13:46 +000064 count += 1
Fredrik Lundhee865c62001-01-19 11:00:42 +000065 except ValueError:
66 pass
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +000067print "done."
68
Fredrik Lundh2acb54a2001-01-19 11:13:46 +000069print "Found", count, "characters in the unicode name database"
70
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +000071# misc. symbol testing
72print "Testing misc. symbols for unicode character name expansion....",
Fredrik Lundhf6056062001-01-20 11:15:25 +000073exec r"""
Marc-André Lemburg36619082001-01-17 19:11:13 +000074verify(u"\N{PILCROW SIGN}" == u"\u00b6")
75verify(u"\N{REPLACEMENT CHARACTER}" == u"\uFFFD")
76verify(u"\N{HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK}" == u"\uFF9F")
77verify(u"\N{FULLWIDTH LATIN SMALL LETTER A}" == u"\uFF41")
Fredrik Lundhf6056062001-01-20 11:15:25 +000078"""
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +000079print "done."
80
81
82# strict error testing:
83print "Testing unicode character name expansion strict error handling....",
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +000084try:
Fred Drake004d5e62000-10-23 17:22:08 +000085 unicode("\N{blah}", 'unicode-escape', 'strict')
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +000086except UnicodeError:
Fred Drake004d5e62000-10-23 17:22:08 +000087 pass
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +000088else:
Fred Drake004d5e62000-10-23 17:22:08 +000089 raise AssertionError, "failed to raise an exception when given a bogus character name"
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +000090
91try:
Fredrik Lundh0fdb90c2001-01-19 09:45:02 +000092 unicode("\N{" + "x" * 100000 + "}", 'unicode-escape', 'strict')
93except UnicodeError:
94 pass
95else:
96 raise AssertionError, "failed to raise an exception when given a very " \
97 "long bogus character name"
98
99try:
Fred Drake004d5e62000-10-23 17:22:08 +0000100 unicode("\N{SPACE", 'unicode-escape', 'strict')
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +0000101except UnicodeError:
Fred Drake004d5e62000-10-23 17:22:08 +0000102 pass
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +0000103else:
Fred Drake004d5e62000-10-23 17:22:08 +0000104 raise AssertionError, "failed to raise an exception for a missing closing brace."
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +0000105
106try:
Fred Drake004d5e62000-10-23 17:22:08 +0000107 unicode("\NSPACE", 'unicode-escape', 'strict')
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +0000108except UnicodeError:
Fred Drake004d5e62000-10-23 17:22:08 +0000109 pass
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +0000110else:
Fred Drake004d5e62000-10-23 17:22:08 +0000111 raise AssertionError, "failed to raise an exception for a missing opening brace."
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +0000112print "done."