blob: c7ecacec38778807af99a9a58e985f6f6bc1b190 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Marc-André Lemburgbb4f1bd2004-12-10 21:58:14 +00002"""
3 Convert the X11 locale.alias file into a mapping dictionary suitable
4 for locale.py.
5
6 Written by Marc-Andre Lemburg <mal@genix.com>, 2004-12-10.
7
8"""
9import locale
Serhiy Storchaka8c4f57d2013-12-27 00:56:53 +020010import sys
Serhiy Storchakaea4f0572014-10-01 23:42:30 +030011_locale = locale
Marc-André Lemburgbb4f1bd2004-12-10 21:58:14 +000012
Serhiy Storchaka8276d872014-10-02 10:38:12 +030013# Location of the X11 alias file.
Antoine Pitrou0c70d2d2010-04-11 22:35:34 +000014LOCALE_ALIAS = '/usr/share/X11/locale/locale.alias'
Serhiy Storchaka8276d872014-10-02 10:38:12 +030015# Location of the glibc SUPPORTED locales file.
16SUPPORTED = '/usr/share/i18n/SUPPORTED'
Marc-André Lemburgbb4f1bd2004-12-10 21:58:14 +000017
18def parse(filename):
19
Serhiy Storchaka55c6cc42013-12-23 18:56:08 +020020 with open(filename, encoding='latin1') as f:
21 lines = list(f)
Marc-André Lemburgbb4f1bd2004-12-10 21:58:14 +000022 data = {}
23 for line in lines:
24 line = line.strip()
25 if not line:
26 continue
27 if line[:1] == '#':
28 continue
29 locale, alias = line.split()
Serhiy Storchaka5eb01532013-12-26 21:20:59 +020030 # Fix non-standard locale names, e.g. ks_IN@devanagari.UTF-8
31 if '@' in alias:
32 alias_lang, _, alias_mod = alias.partition('@')
33 if '.' in alias_mod:
34 alias_mod, _, alias_enc = alias_mod.partition('.')
35 alias = alias_lang + '.' + alias_enc + '@' + alias_mod
Marc-André Lemburgbb4f1bd2004-12-10 21:58:14 +000036 # Strip ':'
37 if locale[-1] == ':':
38 locale = locale[:-1]
39 # Lower-case locale
40 locale = locale.lower()
41 # Ignore one letter locale mappings (except for 'c')
42 if len(locale) == 1 and locale != 'c':
43 continue
44 # Normalize encoding, if given
45 if '.' in locale:
46 lang, encoding = locale.split('.')[:2]
47 encoding = encoding.replace('-', '')
48 encoding = encoding.replace('_', '')
49 locale = lang + '.' + encoding
Marc-André Lemburgbb4f1bd2004-12-10 21:58:14 +000050 data[locale] = alias
51 return data
52
Serhiy Storchakaea4f0572014-10-01 23:42:30 +030053def parse_glibc_supported(filename):
54
55 with open(filename, encoding='latin1') as f:
56 lines = list(f)
57 data = {}
58 for line in lines:
59 line = line.strip()
60 if not line:
61 continue
62 if line[:1] == '#':
63 continue
Serhiy Storchaka8276d872014-10-02 10:38:12 +030064 line = line.replace('/', ' ').strip()
Serhiy Storchakaea4f0572014-10-01 23:42:30 +030065 line = line.rstrip('\\').rstrip()
Serhiy Storchaka8276d872014-10-02 10:38:12 +030066 words = line.split()
67 if len(words) != 2:
68 continue
69 alias, alias_encoding = words
Serhiy Storchakaea4f0572014-10-01 23:42:30 +030070 # Lower-case locale
71 locale = alias.lower()
72 # Normalize encoding, if given
73 if '.' in locale:
74 lang, encoding = locale.split('.')[:2]
75 encoding = encoding.replace('-', '')
76 encoding = encoding.replace('_', '')
77 locale = lang + '.' + encoding
78 # Add an encoding to alias
79 alias, _, modifier = alias.partition('@')
80 alias = _locale._replace_encoding(alias, alias_encoding)
81 if modifier and not (modifier == 'euro' and alias_encoding == 'ISO-8859-15'):
82 alias += '@' + modifier
83 data[locale] = alias
84 return data
85
Marc-André Lemburgbb4f1bd2004-12-10 21:58:14 +000086def pprint(data):
Georg Brandlbf82e372008-05-16 17:02:34 +000087 items = sorted(data.items())
88 for k, v in items:
Serhiy Storchaka55c6cc42013-12-23 18:56:08 +020089 print(' %-40s%a,' % ('%a:' % k, v))
Marc-André Lemburgbb4f1bd2004-12-10 21:58:14 +000090
91def print_differences(data, olddata):
Georg Brandlbf82e372008-05-16 17:02:34 +000092 items = sorted(olddata.items())
Marc-André Lemburgbb4f1bd2004-12-10 21:58:14 +000093 for k, v in items:
Georg Brandlbf82e372008-05-16 17:02:34 +000094 if k not in data:
Serhiy Storchaka55c6cc42013-12-23 18:56:08 +020095 print('# removed %a' % k)
Marc-André Lemburgbb4f1bd2004-12-10 21:58:14 +000096 elif olddata[k] != data[k]:
Serhiy Storchaka55c6cc42013-12-23 18:56:08 +020097 print('# updated %a -> %a to %a' % \
Collin Winter6afaeb72007-08-03 17:06:41 +000098 (k, olddata[k], data[k]))
Marc-André Lemburgbb4f1bd2004-12-10 21:58:14 +000099 # Additions are not mentioned
Tim Peters5a9fb3c2005-01-07 16:01:32 +0000100
Serhiy Storchaka8c4f57d2013-12-27 00:56:53 +0200101def optimize(data):
102 locale_alias = locale.locale_alias
103 locale.locale_alias = data.copy()
104 for k, v in data.items():
105 del locale.locale_alias[k]
106 if locale.normalize(k) != v:
107 locale.locale_alias[k] = v
108 newdata = locale.locale_alias
109 errors = check(data)
110 locale.locale_alias = locale_alias
111 if errors:
112 sys.exit(1)
113 return newdata
114
115def check(data):
116 # Check that all alias definitions from the X11 file
117 # are actually mapped to the correct alias locales.
118 errors = 0
119 for k, v in data.items():
120 if locale.normalize(k) != v:
121 print('ERROR: %a -> %a != %a' % (k, locale.normalize(k), v),
122 file=sys.stderr)
123 errors += 1
124 return errors
125
Marc-André Lemburgbb4f1bd2004-12-10 21:58:14 +0000126if __name__ == '__main__':
Serhiy Storchakaea4f0572014-10-01 23:42:30 +0300127 import argparse
128 parser = argparse.ArgumentParser()
129 parser.add_argument('--locale-alias', default=LOCALE_ALIAS,
130 help='location of the X11 alias file '
131 '(default: %a)' % LOCALE_ALIAS)
Serhiy Storchaka8276d872014-10-02 10:38:12 +0300132 parser.add_argument('--glibc-supported', default=SUPPORTED,
133 help='location of the glibc SUPPORTED locales file '
134 '(default: %a)' % SUPPORTED)
Serhiy Storchakaea4f0572014-10-01 23:42:30 +0300135 args = parser.parse_args()
136
Marc-André Lemburgbb4f1bd2004-12-10 21:58:14 +0000137 data = locale.locale_alias.copy()
Serhiy Storchaka8276d872014-10-02 10:38:12 +0300138 data.update(parse_glibc_supported(args.glibc_supported))
Serhiy Storchakaea4f0572014-10-01 23:42:30 +0300139 data.update(parse(args.locale_alias))
Serhiy Storchaka5189ee52014-10-02 10:21:43 +0300140 while True:
141 # Repeat optimization while the size is decreased.
142 n = len(data)
143 data = optimize(data)
144 if len(data) == n:
145 break
Marc-André Lemburgbb4f1bd2004-12-10 21:58:14 +0000146 print_differences(data, locale.locale_alias)
Collin Winter6afaeb72007-08-03 17:06:41 +0000147 print()
148 print('locale_alias = {')
Marc-André Lemburgbb4f1bd2004-12-10 21:58:14 +0000149 pprint(data)
Collin Winter6afaeb72007-08-03 17:06:41 +0000150 print('}')