blob: a51dc9a72e3bb3bbd941a67d33ac61e600453d3c [file] [log] [blame]
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +00001""" Test script for the Unicode implementation.
2
3Written by Bill Tutt.
4
5(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
7"""#"
Marc-André Lemburg36619082001-01-17 19:11:13 +00008from test_support import verify, verbose
9
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +000010print 'Testing General Unicode Character Name, and case insensitivity...',
11
12# General and case insensitivity test:
13s = u"\N{LATIN CAPITAL LETTER T}" \
14 u"\N{LATIN SMALL LETTER H}" \
15 u"\N{LATIN SMALL LETTER E}" \
16 u"\N{SPACE}" \
17 u"\N{LATIN SMALL LETTER R}" \
18 u"\N{LATIN CAPITAL LETTER E}" \
19 u"\N{LATIN SMALL LETTER D}" \
20 u"\N{SPACE}" \
21 u"\N{LATIN SMALL LETTER f}" \
22 u"\N{LATIN CAPITAL LeTtEr o}" \
23 u"\N{LATIN SMaLl LETTER x}" \
24 u"\N{SPACE}" \
25 u"\N{LATIN SMALL LETTER A}" \
26 u"\N{LATIN SMALL LETTER T}" \
27 u"\N{LATIN SMALL LETTER E}" \
28 u"\N{SPACE}" \
29 u"\N{LATIN SMALL LETTER T}" \
30 u"\N{LATIN SMALL LETTER H}" \
31 u"\N{LATIN SMALL LETTER E}" \
32 u"\N{SpAcE}" \
33 u"\N{LATIN SMALL LETTER S}" \
34 u"\N{LATIN SMALL LETTER H}" \
35 u"\N{LATIN SMALL LETTER E}" \
36 u"\N{LATIN SMALL LETTER E}" \
37 u"\N{LATIN SMALL LETTER P}" \
38 u"\N{FULL STOP}"
Marc-André Lemburg36619082001-01-17 19:11:13 +000039verify(s == u"The rEd fOx ate the sheep.", s)
Fredrik Lundhee865c62001-01-19 11:00:42 +000040
41import ucnhash
42
43# minimal sanity check
44for char in "SPAM":
45 name = "LATIN SMALL LETTER %s" % char
46 code = ucnhash.getcode(name)
47 verify(ucnhash.getname(code) == name)
48
49# loop over all characters in the database
50for code in range(65536):
51 try:
52 name = ucnhash.getname(code)
53 verify(ucnhash.getcode(name) == code)
54 except ValueError:
55 pass
56
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +000057print "done."
58
59# misc. symbol testing
60print "Testing misc. symbols for unicode character name expansion....",
Marc-André Lemburg36619082001-01-17 19:11:13 +000061verify(u"\N{PILCROW SIGN}" == u"\u00b6")
62verify(u"\N{REPLACEMENT CHARACTER}" == u"\uFFFD")
63verify(u"\N{HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK}" == u"\uFF9F")
64verify(u"\N{FULLWIDTH LATIN SMALL LETTER A}" == u"\uFF41")
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +000065print "done."
66
67
68# strict error testing:
69print "Testing unicode character name expansion strict error handling....",
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +000070try:
Fred Drake004d5e62000-10-23 17:22:08 +000071 unicode("\N{blah}", 'unicode-escape', 'strict')
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +000072except UnicodeError:
Fred Drake004d5e62000-10-23 17:22:08 +000073 pass
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +000074else:
Fred Drake004d5e62000-10-23 17:22:08 +000075 raise AssertionError, "failed to raise an exception when given a bogus character name"
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +000076
77try:
Fredrik Lundh0fdb90c2001-01-19 09:45:02 +000078 unicode("\N{" + "x" * 100000 + "}", 'unicode-escape', 'strict')
79except UnicodeError:
80 pass
81else:
82 raise AssertionError, "failed to raise an exception when given a very " \
83 "long bogus character name"
84
85try:
Fred Drake004d5e62000-10-23 17:22:08 +000086 unicode("\N{SPACE", 'unicode-escape', 'strict')
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +000087except UnicodeError:
Fred Drake004d5e62000-10-23 17:22:08 +000088 pass
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +000089else:
Fred Drake004d5e62000-10-23 17:22:08 +000090 raise AssertionError, "failed to raise an exception for a missing closing brace."
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +000091
92try:
Fred Drake004d5e62000-10-23 17:22:08 +000093 unicode("\NSPACE", 'unicode-escape', 'strict')
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +000094except UnicodeError:
Fred Drake004d5e62000-10-23 17:22:08 +000095 pass
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +000096else:
Fred Drake004d5e62000-10-23 17:22:08 +000097 raise AssertionError, "failed to raise an exception for a missing opening brace."
Marc-André Lemburg6cdec2e2000-06-30 09:45:20 +000098print "done."