blob: f6dbe0e95f3c0d6231440bda2746c7a6bbb37c1e [file] [log] [blame]
Guido van Rossum24bdb042000-03-28 20:29:59 +00001""" Test script for the unicodedata module.
2
Marc-André Lemburg6a20ee72000-09-26 16:18:58 +00003 Written by Marc-Andre Lemburg (mal@lemburg.com).
Guido van Rossum24bdb042000-03-28 20:29:59 +00004
Marc-André Lemburg6a20ee72000-09-26 16:18:58 +00005 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
Guido van Rossum24bdb042000-03-28 20:29:59 +00006
7"""#"
Walter Dörwald37c47282003-02-26 14:49:41 +00008import unittest, test.test_support
Marc-André Lemburg6a20ee72000-09-26 16:18:58 +00009import sha
Guido van Rossum24bdb042000-03-28 20:29:59 +000010
Marc-André Lemburg67ceca72000-09-27 12:24:34 +000011encoding = 'utf-8'
12
Marc-André Lemburg6a20ee72000-09-26 16:18:58 +000013
14### Run tests
15
Walter Dörwald37c47282003-02-26 14:49:41 +000016class UnicodeMethodsTest(unittest.TestCase):
Marc-André Lemburg6a20ee72000-09-26 16:18:58 +000017
Walter Dörwald37c47282003-02-26 14:49:41 +000018 # update this, if the database changes
19 expectedchecksum = 'a37276dc2c158bef6dfd908ad34525c97180fad9'
Guido van Rossum24bdb042000-03-28 20:29:59 +000020
Walter Dörwald37c47282003-02-26 14:49:41 +000021 def test_method_checksum(self):
22 h = sha.sha()
23 for i in range(65536):
24 char = unichr(i)
25 data = [
26 # Predicates (single char)
27 u"01"[char.isalnum()],
28 u"01"[char.isalpha()],
29 u"01"[char.isdecimal()],
30 u"01"[char.isdigit()],
31 u"01"[char.islower()],
32 u"01"[char.isnumeric()],
33 u"01"[char.isspace()],
34 u"01"[char.istitle()],
35 u"01"[char.isupper()],
Guido van Rossum24bdb042000-03-28 20:29:59 +000036
Walter Dörwald37c47282003-02-26 14:49:41 +000037 # Predicates (multiple chars)
38 u"01"[(char + u'abc').isalnum()],
39 u"01"[(char + u'abc').isalpha()],
40 u"01"[(char + u'123').isdecimal()],
41 u"01"[(char + u'123').isdigit()],
42 u"01"[(char + u'abc').islower()],
43 u"01"[(char + u'123').isnumeric()],
44 u"01"[(char + u' \t').isspace()],
45 u"01"[(char + u'abc').istitle()],
46 u"01"[(char + u'ABC').isupper()],
Guido van Rossum24bdb042000-03-28 20:29:59 +000047
Walter Dörwald37c47282003-02-26 14:49:41 +000048 # Mappings (single char)
49 char.lower(),
50 char.upper(),
51 char.title(),
Guido van Rossum24bdb042000-03-28 20:29:59 +000052
Walter Dörwald37c47282003-02-26 14:49:41 +000053 # Mappings (multiple chars)
54 (char + u'abc').lower(),
55 (char + u'ABC').upper(),
56 (char + u'abc').title(),
57 (char + u'ABC').title(),
Guido van Rossum24bdb042000-03-28 20:29:59 +000058
Walter Dörwald37c47282003-02-26 14:49:41 +000059 ]
60 h.update(u''.join(data).encode(encoding))
61 result = h.hexdigest()
62 self.assertEqual(result, self.expectedchecksum)
Guido van Rossum24bdb042000-03-28 20:29:59 +000063
Walter Dörwald37c47282003-02-26 14:49:41 +000064class UnicodeDatabaseTest(unittest.TestCase):
Guido van Rossum24bdb042000-03-28 20:29:59 +000065
Walter Dörwald37c47282003-02-26 14:49:41 +000066 def setUp(self):
67 # In case unicodedata is not available, this will raise an ImportError,
68 # but the other test cases will still be run
69 import unicodedata
70 self.db = unicodedata
Guido van Rossum24bdb042000-03-28 20:29:59 +000071
Walter Dörwald37c47282003-02-26 14:49:41 +000072 def tearDown(self):
73 del self.db
Guido van Rossum24bdb042000-03-28 20:29:59 +000074
Walter Dörwald37c47282003-02-26 14:49:41 +000075class UnicodeFunctionsTest(UnicodeDatabaseTest):
Guido van Rossum24bdb042000-03-28 20:29:59 +000076
Walter Dörwald37c47282003-02-26 14:49:41 +000077 # update this, if the database changes
78 expectedchecksum = 'cfe20a967a450ebc82ca68c3e4eed344164e11af'
79
80 def test_function_checksum(self):
81 data = []
82 h = sha.sha()
83
84 for i in range(0x10000):
85 char = unichr(i)
86 data = [
87 # Properties
88 str(self.db.digit(char, -1)),
89 str(self.db.numeric(char, -1)),
90 str(self.db.decimal(char, -1)),
91 self.db.category(char),
92 self.db.bidirectional(char),
93 self.db.decomposition(char),
94 str(self.db.mirrored(char)),
95 str(self.db.combining(char)),
96 ]
97 h.update(''.join(data))
98 result = h.hexdigest()
99 self.assertEqual(result, self.expectedchecksum)
100
101 def test_digit(self):
102 self.assertEqual(self.db.digit(u'A', None), None)
103 self.assertEqual(self.db.digit(u'9'), 9)
104 self.assertEqual(self.db.digit(u'\u215b', None), None)
105 self.assertEqual(self.db.digit(u'\u2468'), 9)
106
107 self.assertRaises(TypeError, self.db.digit)
108 self.assertRaises(TypeError, self.db.digit, u'xx')
109 self.assertRaises(ValueError, self.db.digit, u'x')
110
111 def test_numeric(self):
112 self.assertEqual(self.db.numeric(u'A',None), None)
113 self.assertEqual(self.db.numeric(u'9'), 9)
114 self.assertEqual(self.db.numeric(u'\u215b'), 0.125)
115 self.assertEqual(self.db.numeric(u'\u2468'), 9.0)
116
117 self.assertRaises(TypeError, self.db.numeric)
118 self.assertRaises(TypeError, self.db.numeric, u'xx')
119 self.assertRaises(ValueError, self.db.numeric, u'x')
120
121 def test_decimal(self):
122 self.assertEqual(self.db.decimal(u'A',None), None)
123 self.assertEqual(self.db.decimal(u'9'), 9)
124 self.assertEqual(self.db.decimal(u'\u215b', None), None)
125 self.assertEqual(self.db.decimal(u'\u2468', None), None)
126
127 self.assertRaises(TypeError, self.db.decimal)
128 self.assertRaises(TypeError, self.db.decimal, u'xx')
129 self.assertRaises(ValueError, self.db.decimal, u'x')
130
131 def test_category(self):
132 self.assertEqual(self.db.category(u'\uFFFE'), 'Cn')
133 self.assertEqual(self.db.category(u'a'), 'Ll')
134 self.assertEqual(self.db.category(u'A'), 'Lu')
135
136 self.assertRaises(TypeError, self.db.category)
137 self.assertRaises(TypeError, self.db.category, u'xx')
138
139 def test_bidirectional(self):
140 self.assertEqual(self.db.bidirectional(u'\uFFFE'), '')
141 self.assertEqual(self.db.bidirectional(u' '), 'WS')
142 self.assertEqual(self.db.bidirectional(u'A'), 'L')
143
144 self.assertRaises(TypeError, self.db.bidirectional)
145 self.assertRaises(TypeError, self.db.bidirectional, u'xx')
146
147 def test_decomposition(self):
148 self.assertEqual(self.db.decomposition(u'\uFFFE'),'')
149 self.assertEqual(self.db.decomposition(u'\u00bc'), '<fraction> 0031 2044 0034')
150
151 self.assertRaises(TypeError, self.db.decomposition)
152 self.assertRaises(TypeError, self.db.decomposition, u'xx')
153
154 def test_mirrored(self):
155 self.assertEqual(self.db.mirrored(u'\uFFFE'), 0)
156 self.assertEqual(self.db.mirrored(u'a'), 0)
157 self.assertEqual(self.db.mirrored(u'\u2201'), 1)
158
159 self.assertRaises(TypeError, self.db.mirrored)
160 self.assertRaises(TypeError, self.db.mirrored, u'xx')
161
162 def test_combining(self):
163 self.assertEqual(self.db.combining(u'\uFFFE'), 0)
164 self.assertEqual(self.db.combining(u'a'), 0)
165 self.assertEqual(self.db.combining(u'\u20e1'), 230)
166
167 self.assertRaises(TypeError, self.db.combining)
168 self.assertRaises(TypeError, self.db.combining, u'xx')
169
170 def test_normalize(self):
171 self.assertRaises(TypeError, self.db.normalize)
172 self.assertRaises(ValueError, self.db.normalize, 'unknown', u'xx')
Martin v. Löwis61e40bd2004-04-17 19:36:48 +0000173 self.assertEqual(self.db.normalize('NFKC', u''), u'')
Walter Dörwald37c47282003-02-26 14:49:41 +0000174 # The rest can be found in test_normalization.py
175 # which requires an external file.
176
Hye-Shik Change9ddfbb2004-08-04 07:38:35 +0000177 def test_east_asian_width(self):
178 eaw = self.db.east_asian_width
179 self.assertRaises(TypeError, eaw, 'a')
180 self.assertRaises(TypeError, eaw, u'')
181 self.assertRaises(TypeError, eaw, u'ra')
182 self.assertEqual(eaw(u'\x1e'), 'N')
183 self.assertEqual(eaw(u'\x20'), 'Na')
184 self.assertEqual(eaw(u'\uC894'), 'W')
185 self.assertEqual(eaw(u'\uFF66'), 'H')
186 self.assertEqual(eaw(u'\uFF1F'), 'F')
187 self.assertEqual(eaw(u'\u2010'), 'A')
Walter Dörwald37c47282003-02-26 14:49:41 +0000188
189class UnicodeMiscTest(UnicodeDatabaseTest):
190
191 def test_decimal_numeric_consistent(self):
192 # Test that decimal and numeric are consistent,
193 # i.e. if a character has a decimal value,
Georg Brandl7eb4b7d2005-07-22 21:49:32 +0000194 # its numeric value should be the same.
Walter Dörwald37c47282003-02-26 14:49:41 +0000195 count = 0
196 for i in xrange(0x10000):
197 c = unichr(i)
198 dec = self.db.decimal(c, -1)
199 if dec != -1:
200 self.assertEqual(dec, self.db.numeric(c))
201 count += 1
202 self.assert_(count >= 10) # should have tested at least the ASCII digits
203
204 def test_digit_numeric_consistent(self):
205 # Test that digit and numeric are consistent,
Tim Peters669454e2003-03-07 17:30:48 +0000206 # i.e. if a character has a digit value,
Georg Brandl7eb4b7d2005-07-22 21:49:32 +0000207 # its numeric value should be the same.
Walter Dörwald37c47282003-02-26 14:49:41 +0000208 count = 0
209 for i in xrange(0x10000):
210 c = unichr(i)
211 dec = self.db.digit(c, -1)
212 if dec != -1:
213 self.assertEqual(dec, self.db.numeric(c))
214 count += 1
215 self.assert_(count >= 10) # should have tested at least the ASCII digits
216
217def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000218 test.test_support.run_unittest(
219 UnicodeMiscTest,
220 UnicodeMethodsTest,
221 UnicodeFunctionsTest
222 )
Walter Dörwald37c47282003-02-26 14:49:41 +0000223
224if __name__ == "__main__":
225 test_main()