Benjamin Peterson | 90f5ba5 | 2010-03-11 22:53:45 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Marc-André Lemburg | bb4f1bd | 2004-12-10 21:58:14 +0000 | [diff] [blame] | 2 | """ |
| 3 | Convert the X11 locale.alias file into a mapping dictionary suitable |
| 4 | for locale.py. |
| 5 | |
| 6 | Written by Marc-Andre Lemburg <mal@genix.com>, 2004-12-10. |
| 7 | |
| 8 | """ |
| 9 | import locale |
Serhiy Storchaka | 8c4f57d | 2013-12-27 00:56:53 +0200 | [diff] [blame] | 10 | import sys |
Marc-André Lemburg | bb4f1bd | 2004-12-10 21:58:14 +0000 | [diff] [blame] | 11 | |
| 12 | # Location of the alias file |
Antoine Pitrou | 0c70d2d | 2010-04-11 22:35:34 +0000 | [diff] [blame] | 13 | LOCALE_ALIAS = '/usr/share/X11/locale/locale.alias' |
Marc-André Lemburg | bb4f1bd | 2004-12-10 21:58:14 +0000 | [diff] [blame] | 14 | |
| 15 | def parse(filename): |
| 16 | |
Serhiy Storchaka | 55c6cc4 | 2013-12-23 18:56:08 +0200 | [diff] [blame] | 17 | with open(filename, encoding='latin1') as f: |
| 18 | lines = list(f) |
Marc-André Lemburg | bb4f1bd | 2004-12-10 21:58:14 +0000 | [diff] [blame] | 19 | data = {} |
| 20 | for line in lines: |
| 21 | line = line.strip() |
| 22 | if not line: |
| 23 | continue |
| 24 | if line[:1] == '#': |
| 25 | continue |
| 26 | locale, alias = line.split() |
Serhiy Storchaka | 5eb0153 | 2013-12-26 21:20:59 +0200 | [diff] [blame] | 27 | # Fix non-standard locale names, e.g. ks_IN@devanagari.UTF-8 |
| 28 | if '@' in alias: |
| 29 | alias_lang, _, alias_mod = alias.partition('@') |
| 30 | if '.' in alias_mod: |
| 31 | alias_mod, _, alias_enc = alias_mod.partition('.') |
| 32 | alias = alias_lang + '.' + alias_enc + '@' + alias_mod |
Marc-André Lemburg | bb4f1bd | 2004-12-10 21:58:14 +0000 | [diff] [blame] | 33 | # Strip ':' |
| 34 | if locale[-1] == ':': |
| 35 | locale = locale[:-1] |
| 36 | # Lower-case locale |
| 37 | locale = locale.lower() |
| 38 | # Ignore one letter locale mappings (except for 'c') |
| 39 | if len(locale) == 1 and locale != 'c': |
| 40 | continue |
| 41 | # Normalize encoding, if given |
| 42 | if '.' in locale: |
| 43 | lang, encoding = locale.split('.')[:2] |
| 44 | encoding = encoding.replace('-', '') |
| 45 | encoding = encoding.replace('_', '') |
| 46 | locale = lang + '.' + encoding |
Marc-André Lemburg | b4cebd4 | 2004-12-13 19:56:01 +0000 | [diff] [blame] | 47 | if encoding.lower() == 'utf8': |
| 48 | # Ignore UTF-8 mappings - this encoding should be |
| 49 | # available for all locales |
| 50 | continue |
Marc-André Lemburg | bb4f1bd | 2004-12-10 21:58:14 +0000 | [diff] [blame] | 51 | data[locale] = alias |
| 52 | return data |
| 53 | |
| 54 | def pprint(data): |
Georg Brandl | bf82e37 | 2008-05-16 17:02:34 +0000 | [diff] [blame] | 55 | items = sorted(data.items()) |
| 56 | for k, v in items: |
Serhiy Storchaka | 55c6cc4 | 2013-12-23 18:56:08 +0200 | [diff] [blame] | 57 | print(' %-40s%a,' % ('%a:' % k, v)) |
Marc-André Lemburg | bb4f1bd | 2004-12-10 21:58:14 +0000 | [diff] [blame] | 58 | |
| 59 | def print_differences(data, olddata): |
Georg Brandl | bf82e37 | 2008-05-16 17:02:34 +0000 | [diff] [blame] | 60 | items = sorted(olddata.items()) |
Marc-André Lemburg | bb4f1bd | 2004-12-10 21:58:14 +0000 | [diff] [blame] | 61 | for k, v in items: |
Georg Brandl | bf82e37 | 2008-05-16 17:02:34 +0000 | [diff] [blame] | 62 | if k not in data: |
Serhiy Storchaka | 55c6cc4 | 2013-12-23 18:56:08 +0200 | [diff] [blame] | 63 | print('# removed %a' % k) |
Marc-André Lemburg | bb4f1bd | 2004-12-10 21:58:14 +0000 | [diff] [blame] | 64 | elif olddata[k] != data[k]: |
Serhiy Storchaka | 55c6cc4 | 2013-12-23 18:56:08 +0200 | [diff] [blame] | 65 | print('# updated %a -> %a to %a' % \ |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 66 | (k, olddata[k], data[k])) |
Marc-André Lemburg | bb4f1bd | 2004-12-10 21:58:14 +0000 | [diff] [blame] | 67 | # Additions are not mentioned |
Tim Peters | 5a9fb3c | 2005-01-07 16:01:32 +0000 | [diff] [blame] | 68 | |
Serhiy Storchaka | 8c4f57d | 2013-12-27 00:56:53 +0200 | [diff] [blame] | 69 | def optimize(data): |
| 70 | locale_alias = locale.locale_alias |
| 71 | locale.locale_alias = data.copy() |
| 72 | for k, v in data.items(): |
| 73 | del locale.locale_alias[k] |
| 74 | if locale.normalize(k) != v: |
| 75 | locale.locale_alias[k] = v |
| 76 | newdata = locale.locale_alias |
| 77 | errors = check(data) |
| 78 | locale.locale_alias = locale_alias |
| 79 | if errors: |
| 80 | sys.exit(1) |
| 81 | return newdata |
| 82 | |
| 83 | def check(data): |
| 84 | # Check that all alias definitions from the X11 file |
| 85 | # are actually mapped to the correct alias locales. |
| 86 | errors = 0 |
| 87 | for k, v in data.items(): |
| 88 | if locale.normalize(k) != v: |
| 89 | print('ERROR: %a -> %a != %a' % (k, locale.normalize(k), v), |
| 90 | file=sys.stderr) |
| 91 | errors += 1 |
| 92 | return errors |
| 93 | |
Marc-André Lemburg | bb4f1bd | 2004-12-10 21:58:14 +0000 | [diff] [blame] | 94 | if __name__ == '__main__': |
| 95 | data = locale.locale_alias.copy() |
| 96 | data.update(parse(LOCALE_ALIAS)) |
Serhiy Storchaka | 8c4f57d | 2013-12-27 00:56:53 +0200 | [diff] [blame] | 97 | data = optimize(data) |
Marc-André Lemburg | bb4f1bd | 2004-12-10 21:58:14 +0000 | [diff] [blame] | 98 | print_differences(data, locale.locale_alias) |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 99 | print() |
| 100 | print('locale_alias = {') |
Marc-André Lemburg | bb4f1bd | 2004-12-10 21:58:14 +0000 | [diff] [blame] | 101 | pprint(data) |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 102 | print('}') |