blob: e47eafd4c9817dafbffefdea71aa551d6ee2b2bf [file] [log] [blame]
Guido van Rossum24bdb042000-03-28 20:29:59 +00001""" Test script for the unicodedata module.
2
Marc-André Lemburg6a20ee72000-09-26 16:18:58 +00003 Written by Marc-Andre Lemburg (mal@lemburg.com).
Guido van Rossum24bdb042000-03-28 20:29:59 +00004
Marc-André Lemburg6a20ee72000-09-26 16:18:58 +00005 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
Guido van Rossum24bdb042000-03-28 20:29:59 +00006
7"""#"
Walter Dörwald37c47282003-02-26 14:49:41 +00008import unittest, test.test_support
Georg Brandlbffb0bc2006-04-30 08:57:35 +00009import hashlib
Guido van Rossum24bdb042000-03-28 20:29:59 +000010
Marc-André Lemburg67ceca72000-09-27 12:24:34 +000011encoding = 'utf-8'
12
Marc-André Lemburg6a20ee72000-09-26 16:18:58 +000013
14### Run tests
15
Walter Dörwald37c47282003-02-26 14:49:41 +000016class UnicodeMethodsTest(unittest.TestCase):
Marc-André Lemburg6a20ee72000-09-26 16:18:58 +000017
Walter Dörwald37c47282003-02-26 14:49:41 +000018 # update this, if the database changes
Martin v. Löwisd004fc82006-05-27 08:36:52 +000019 expectedchecksum = 'c198ed264497f108434b3f576d4107237221cc8a'
Guido van Rossum24bdb042000-03-28 20:29:59 +000020
Walter Dörwald37c47282003-02-26 14:49:41 +000021 def test_method_checksum(self):
Georg Brandlbffb0bc2006-04-30 08:57:35 +000022 h = hashlib.sha1()
Walter Dörwald37c47282003-02-26 14:49:41 +000023 for i in range(65536):
24 char = unichr(i)
25 data = [
26 # Predicates (single char)
27 u"01"[char.isalnum()],
28 u"01"[char.isalpha()],
29 u"01"[char.isdecimal()],
30 u"01"[char.isdigit()],
31 u"01"[char.islower()],
32 u"01"[char.isnumeric()],
33 u"01"[char.isspace()],
34 u"01"[char.istitle()],
35 u"01"[char.isupper()],
Guido van Rossum24bdb042000-03-28 20:29:59 +000036
Walter Dörwald37c47282003-02-26 14:49:41 +000037 # Predicates (multiple chars)
38 u"01"[(char + u'abc').isalnum()],
39 u"01"[(char + u'abc').isalpha()],
40 u"01"[(char + u'123').isdecimal()],
41 u"01"[(char + u'123').isdigit()],
42 u"01"[(char + u'abc').islower()],
43 u"01"[(char + u'123').isnumeric()],
44 u"01"[(char + u' \t').isspace()],
45 u"01"[(char + u'abc').istitle()],
46 u"01"[(char + u'ABC').isupper()],
Guido van Rossum24bdb042000-03-28 20:29:59 +000047
Walter Dörwald37c47282003-02-26 14:49:41 +000048 # Mappings (single char)
49 char.lower(),
50 char.upper(),
51 char.title(),
Guido van Rossum24bdb042000-03-28 20:29:59 +000052
Walter Dörwald37c47282003-02-26 14:49:41 +000053 # Mappings (multiple chars)
54 (char + u'abc').lower(),
55 (char + u'ABC').upper(),
56 (char + u'abc').title(),
57 (char + u'ABC').title(),
Guido van Rossum24bdb042000-03-28 20:29:59 +000058
Walter Dörwald37c47282003-02-26 14:49:41 +000059 ]
60 h.update(u''.join(data).encode(encoding))
61 result = h.hexdigest()
62 self.assertEqual(result, self.expectedchecksum)
Guido van Rossum24bdb042000-03-28 20:29:59 +000063
Walter Dörwald37c47282003-02-26 14:49:41 +000064class UnicodeDatabaseTest(unittest.TestCase):
Guido van Rossum24bdb042000-03-28 20:29:59 +000065
Walter Dörwald37c47282003-02-26 14:49:41 +000066 def setUp(self):
67 # In case unicodedata is not available, this will raise an ImportError,
68 # but the other test cases will still be run
69 import unicodedata
70 self.db = unicodedata
Guido van Rossum24bdb042000-03-28 20:29:59 +000071
Walter Dörwald37c47282003-02-26 14:49:41 +000072 def tearDown(self):
73 del self.db
Guido van Rossum24bdb042000-03-28 20:29:59 +000074
Walter Dörwald37c47282003-02-26 14:49:41 +000075class UnicodeFunctionsTest(UnicodeDatabaseTest):
Guido van Rossum24bdb042000-03-28 20:29:59 +000076
Walter Dörwald37c47282003-02-26 14:49:41 +000077 # update this, if the database changes
Martin v. Löwisd004fc82006-05-27 08:36:52 +000078 expectedchecksum = '4e389f97e9f88b8b7ab743121fd643089116f9f2'
Walter Dörwald37c47282003-02-26 14:49:41 +000079
80 def test_function_checksum(self):
81 data = []
Georg Brandlbffb0bc2006-04-30 08:57:35 +000082 h = hashlib.sha1()
Walter Dörwald37c47282003-02-26 14:49:41 +000083
84 for i in range(0x10000):
85 char = unichr(i)
86 data = [
87 # Properties
88 str(self.db.digit(char, -1)),
89 str(self.db.numeric(char, -1)),
90 str(self.db.decimal(char, -1)),
91 self.db.category(char),
92 self.db.bidirectional(char),
93 self.db.decomposition(char),
94 str(self.db.mirrored(char)),
95 str(self.db.combining(char)),
96 ]
97 h.update(''.join(data))
98 result = h.hexdigest()
99 self.assertEqual(result, self.expectedchecksum)
100
101 def test_digit(self):
102 self.assertEqual(self.db.digit(u'A', None), None)
103 self.assertEqual(self.db.digit(u'9'), 9)
104 self.assertEqual(self.db.digit(u'\u215b', None), None)
105 self.assertEqual(self.db.digit(u'\u2468'), 9)
Walter Dörwalda2a89a82008-06-02 20:36:03 +0000106 self.assertEqual(self.db.digit(u'\U00020000', None), None)
Walter Dörwald37c47282003-02-26 14:49:41 +0000107
108 self.assertRaises(TypeError, self.db.digit)
109 self.assertRaises(TypeError, self.db.digit, u'xx')
110 self.assertRaises(ValueError, self.db.digit, u'x')
111
112 def test_numeric(self):
113 self.assertEqual(self.db.numeric(u'A',None), None)
114 self.assertEqual(self.db.numeric(u'9'), 9)
115 self.assertEqual(self.db.numeric(u'\u215b'), 0.125)
116 self.assertEqual(self.db.numeric(u'\u2468'), 9.0)
Walter Dörwalda2a89a82008-06-02 20:36:03 +0000117 self.assertEqual(self.db.numeric(u'\U00020000', None), None)
Walter Dörwald37c47282003-02-26 14:49:41 +0000118
119 self.assertRaises(TypeError, self.db.numeric)
120 self.assertRaises(TypeError, self.db.numeric, u'xx')
121 self.assertRaises(ValueError, self.db.numeric, u'x')
122
123 def test_decimal(self):
124 self.assertEqual(self.db.decimal(u'A',None), None)
125 self.assertEqual(self.db.decimal(u'9'), 9)
126 self.assertEqual(self.db.decimal(u'\u215b', None), None)
127 self.assertEqual(self.db.decimal(u'\u2468', None), None)
Walter Dörwalda2a89a82008-06-02 20:36:03 +0000128 self.assertEqual(self.db.decimal(u'\U00020000', None), None)
Walter Dörwald37c47282003-02-26 14:49:41 +0000129
130 self.assertRaises(TypeError, self.db.decimal)
131 self.assertRaises(TypeError, self.db.decimal, u'xx')
132 self.assertRaises(ValueError, self.db.decimal, u'x')
133
134 def test_category(self):
135 self.assertEqual(self.db.category(u'\uFFFE'), 'Cn')
136 self.assertEqual(self.db.category(u'a'), 'Ll')
137 self.assertEqual(self.db.category(u'A'), 'Lu')
Walter Dörwalda2a89a82008-06-02 20:36:03 +0000138 self.assertEqual(self.db.category(u'\U00020000'), 'Lo')
Walter Dörwald37c47282003-02-26 14:49:41 +0000139
140 self.assertRaises(TypeError, self.db.category)
141 self.assertRaises(TypeError, self.db.category, u'xx')
142
143 def test_bidirectional(self):
144 self.assertEqual(self.db.bidirectional(u'\uFFFE'), '')
145 self.assertEqual(self.db.bidirectional(u' '), 'WS')
146 self.assertEqual(self.db.bidirectional(u'A'), 'L')
Walter Dörwalda2a89a82008-06-02 20:36:03 +0000147 self.assertEqual(self.db.bidirectional(u'\U00020000'), 'L')
Walter Dörwald37c47282003-02-26 14:49:41 +0000148
149 self.assertRaises(TypeError, self.db.bidirectional)
150 self.assertRaises(TypeError, self.db.bidirectional, u'xx')
151
152 def test_decomposition(self):
153 self.assertEqual(self.db.decomposition(u'\uFFFE'),'')
154 self.assertEqual(self.db.decomposition(u'\u00bc'), '<fraction> 0031 2044 0034')
155
156 self.assertRaises(TypeError, self.db.decomposition)
157 self.assertRaises(TypeError, self.db.decomposition, u'xx')
158
159 def test_mirrored(self):
160 self.assertEqual(self.db.mirrored(u'\uFFFE'), 0)
161 self.assertEqual(self.db.mirrored(u'a'), 0)
162 self.assertEqual(self.db.mirrored(u'\u2201'), 1)
Walter Dörwalda2a89a82008-06-02 20:36:03 +0000163 self.assertEqual(self.db.mirrored(u'\U00020000'), 0)
Walter Dörwald37c47282003-02-26 14:49:41 +0000164
165 self.assertRaises(TypeError, self.db.mirrored)
166 self.assertRaises(TypeError, self.db.mirrored, u'xx')
167
168 def test_combining(self):
169 self.assertEqual(self.db.combining(u'\uFFFE'), 0)
170 self.assertEqual(self.db.combining(u'a'), 0)
171 self.assertEqual(self.db.combining(u'\u20e1'), 230)
Walter Dörwalda2a89a82008-06-02 20:36:03 +0000172 self.assertEqual(self.db.combining(u'\U00020000'), 0)
Walter Dörwald37c47282003-02-26 14:49:41 +0000173
174 self.assertRaises(TypeError, self.db.combining)
175 self.assertRaises(TypeError, self.db.combining, u'xx')
176
177 def test_normalize(self):
178 self.assertRaises(TypeError, self.db.normalize)
179 self.assertRaises(ValueError, self.db.normalize, 'unknown', u'xx')
Martin v. Löwis61e40bd2004-04-17 19:36:48 +0000180 self.assertEqual(self.db.normalize('NFKC', u''), u'')
Walter Dörwald37c47282003-02-26 14:49:41 +0000181 # The rest can be found in test_normalization.py
182 # which requires an external file.
183
Hye-Shik Change9ddfbb2004-08-04 07:38:35 +0000184 def test_east_asian_width(self):
185 eaw = self.db.east_asian_width
186 self.assertRaises(TypeError, eaw, 'a')
187 self.assertRaises(TypeError, eaw, u'')
188 self.assertRaises(TypeError, eaw, u'ra')
189 self.assertEqual(eaw(u'\x1e'), 'N')
190 self.assertEqual(eaw(u'\x20'), 'Na')
191 self.assertEqual(eaw(u'\uC894'), 'W')
192 self.assertEqual(eaw(u'\uFF66'), 'H')
193 self.assertEqual(eaw(u'\uFF1F'), 'F')
194 self.assertEqual(eaw(u'\u2010'), 'A')
Walter Dörwalda2a89a82008-06-02 20:36:03 +0000195 self.assertEqual(eaw(u'\U00020000'), 'W')
Walter Dörwald37c47282003-02-26 14:49:41 +0000196
197class UnicodeMiscTest(UnicodeDatabaseTest):
198
199 def test_decimal_numeric_consistent(self):
200 # Test that decimal and numeric are consistent,
201 # i.e. if a character has a decimal value,
Georg Brandl7eb4b7d2005-07-22 21:49:32 +0000202 # its numeric value should be the same.
Walter Dörwald37c47282003-02-26 14:49:41 +0000203 count = 0
204 for i in xrange(0x10000):
205 c = unichr(i)
206 dec = self.db.decimal(c, -1)
207 if dec != -1:
208 self.assertEqual(dec, self.db.numeric(c))
209 count += 1
210 self.assert_(count >= 10) # should have tested at least the ASCII digits
211
212 def test_digit_numeric_consistent(self):
213 # Test that digit and numeric are consistent,
Tim Peters669454e2003-03-07 17:30:48 +0000214 # i.e. if a character has a digit value,
Georg Brandl7eb4b7d2005-07-22 21:49:32 +0000215 # its numeric value should be the same.
Walter Dörwald37c47282003-02-26 14:49:41 +0000216 count = 0
217 for i in xrange(0x10000):
218 c = unichr(i)
219 dec = self.db.digit(c, -1)
220 if dec != -1:
221 self.assertEqual(dec, self.db.numeric(c))
222 count += 1
223 self.assert_(count >= 10) # should have tested at least the ASCII digits
224
Martin v. Löwisf1e0b3f2007-07-28 07:03:05 +0000225 def test_bug_1704793(self):
226 self.assertEquals(self.db.lookup("GOTHIC LETTER FAIHU"), u'\U00010346')
227
Walter Dörwald37c47282003-02-26 14:49:41 +0000228def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000229 test.test_support.run_unittest(
230 UnicodeMiscTest,
231 UnicodeMethodsTest,
232 UnicodeFunctionsTest
233 )
Walter Dörwald37c47282003-02-26 14:49:41 +0000234
235if __name__ == "__main__":
236 test_main()