blob: 0dc575615e4d6ac20139863ad8687ffdf0c97ff9 [file] [log] [blame]
Guido van Rossum24bdb042000-03-28 20:29:59 +00001""" Test script for the unicodedata module.
2
Marc-André Lemburg6a20ee72000-09-26 16:18:58 +00003 Written by Marc-Andre Lemburg (mal@lemburg.com).
Guido van Rossum24bdb042000-03-28 20:29:59 +00004
Marc-André Lemburg6a20ee72000-09-26 16:18:58 +00005 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
Guido van Rossum24bdb042000-03-28 20:29:59 +00006
7"""#"
Marc-André Lemburg6a20ee72000-09-26 16:18:58 +00008import sha
Guido van Rossum24bdb042000-03-28 20:29:59 +00009
Marc-André Lemburg67ceca72000-09-27 12:24:34 +000010encoding = 'utf-8'
11
Marc-André Lemburg6a20ee72000-09-26 16:18:58 +000012def test_methods():
13
14 h = sha.sha()
15 for i in range(65536):
16 char = unichr(i)
17 data = [
18
19 # Predicates (single char)
20 char.isalnum() and u'1' or u'0',
21 char.isalpha() and u'1' or u'0',
22 char.isdecimal() and u'1' or u'0',
23 char.isdigit() and u'1' or u'0',
24 char.islower() and u'1' or u'0',
25 char.isnumeric() and u'1' or u'0',
26 char.isspace() and u'1' or u'0',
27 char.istitle() and u'1' or u'0',
28 char.isupper() and u'1' or u'0',
29
30 # Predicates (multiple chars)
31 (char + u'abc').isalnum() and u'1' or u'0',
32 (char + u'abc').isalpha() and u'1' or u'0',
33 (char + u'123').isdecimal() and u'1' or u'0',
34 (char + u'123').isdigit() and u'1' or u'0',
35 (char + u'abc').islower() and u'1' or u'0',
36 (char + u'123').isnumeric() and u'1' or u'0',
37 (char + u' \t').isspace() and u'1' or u'0',
38 (char + u'abc').istitle() and u'1' or u'0',
39 (char + u'ABC').isupper() and u'1' or u'0',
40
41 # Mappings (single char)
42 char.lower(),
43 char.upper(),
44 char.title(),
45
46 # Mappings (multiple chars)
47 (char + u'abc').lower(),
48 (char + u'ABC').upper(),
49 (char + u'abc').title(),
50 (char + u'ABC').title(),
51
52 ]
Marc-André Lemburg67ceca72000-09-27 12:24:34 +000053 h.update(u''.join(data).encode(encoding))
Marc-André Lemburg6a20ee72000-09-26 16:18:58 +000054 return h.hexdigest()
55
56def test_unicodedata():
57
58 h = sha.sha()
59 for i in range(65536):
60 char = unichr(i)
61 data = [
62 # Properties
63 str(unicodedata.digit(char, -1)),
64 str(unicodedata.numeric(char, -1)),
65 str(unicodedata.decimal(char, -1)),
66 unicodedata.category(char),
67 unicodedata.bidirectional(char),
68 unicodedata.decomposition(char),
69 str(unicodedata.mirrored(char)),
70 str(unicodedata.combining(char)),
71 ]
72 h.update(''.join(data))
73 return h.hexdigest()
74
75### Run tests
76
77print 'Testing Unicode Database...'
78print 'Methods:',
79print test_methods()
80
81# In case unicodedata is not available, this will raise an ImportError,
82# but still test the above cases...
Guido van Rossum24bdb042000-03-28 20:29:59 +000083import unicodedata
Marc-André Lemburg6a20ee72000-09-26 16:18:58 +000084print 'Functions:',
85print test_unicodedata()
Guido van Rossum24bdb042000-03-28 20:29:59 +000086
Marc-André Lemburg6a20ee72000-09-26 16:18:58 +000087# Some additional checks of the API:
88print 'API:',
Guido van Rossum24bdb042000-03-28 20:29:59 +000089
90assert unicodedata.digit(u'A',None) is None
91assert unicodedata.digit(u'9') == 9
92assert unicodedata.digit(u'\u215b',None) is None
93assert unicodedata.digit(u'\u2468') == 9
94
95assert unicodedata.numeric(u'A',None) is None
96assert unicodedata.numeric(u'9') == 9
97assert unicodedata.numeric(u'\u215b') == 0.125
98assert unicodedata.numeric(u'\u2468') == 9.0
99
100assert unicodedata.decimal(u'A',None) is None
101assert unicodedata.decimal(u'9') == 9
102assert unicodedata.decimal(u'\u215b',None) is None
103assert unicodedata.decimal(u'\u2468',None) is None
104
105assert unicodedata.category(u'\uFFFE') == 'Cn'
106assert unicodedata.category(u'a') == 'Ll'
107assert unicodedata.category(u'A') == 'Lu'
108
109assert unicodedata.bidirectional(u'\uFFFE') == ''
110assert unicodedata.bidirectional(u' ') == 'WS'
111assert unicodedata.bidirectional(u'A') == 'L'
112
113assert unicodedata.decomposition(u'\uFFFE') == ''
114assert unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034'
115
116assert unicodedata.mirrored(u'\uFFFE') == 0
117assert unicodedata.mirrored(u'a') == 0
118assert unicodedata.mirrored(u'\u2201') == 1
119
120assert unicodedata.combining(u'\uFFFE') == 0
121assert unicodedata.combining(u'a') == 0
122assert unicodedata.combining(u'\u20e1') == 230
123
Marc-André Lemburg6a20ee72000-09-26 16:18:58 +0000124print 'ok'