blob: 7a03722a3a717f308fa9b440f463d7d35e9f6b21 [file] [log] [blame]
Marc-André Lemburg5431bc32000-06-07 09:11:40 +00001""" Locale support.
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00002
Marc-André Lemburg5431bc32000-06-07 09:11:40 +00003 The module provides low-level access to the C lib's locale APIs
4 and adds high level number formatting APIs as well as a locale
5 aliasing engine to complement these.
6
7 The aliasing engine includes support for many commonly used locale
8 names and maps them to values suitable for passing to the C lib's
9 setlocale() function. It also includes default encodings for all
10 supported locale names.
11
12"""
13
Eric S. Raymondbe9b5072001-02-09 10:48:30 +000014import sys
Marc-André Lemburg5431bc32000-06-07 09:11:40 +000015
Fredrik Lundh6c86b992000-07-09 17:12:58 +000016# Try importing the _locale module.
17#
18# If this fails, fall back on a basic 'C' locale emulation.
Guido van Rossumeef1d4e1997-11-19 19:01:43 +000019
Tim Peters1baf8292001-01-24 10:13:46 +000020# Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before
21# trying the import. So __all__ is also fiddled at the end of the file.
Skip Montanaro17ab1232001-01-24 06:27:27 +000022__all__ = ["setlocale","Error","localeconv","strcoll","strxfrm",
23 "format","str","atof","atoi","LC_CTYPE","LC_COLLATE",
Tim Peters1baf8292001-01-24 10:13:46 +000024 "LC_TIME","LC_MONETARY","LC_NUMERIC", "LC_ALL","CHAR_MAX"]
Skip Montanaro17ab1232001-01-24 06:27:27 +000025
Marc-André Lemburg23481142000-06-08 17:49:41 +000026try:
Fredrik Lundh6c86b992000-07-09 17:12:58 +000027
Marc-André Lemburg23481142000-06-08 17:49:41 +000028 from _locale import *
29
30except ImportError:
31
Fredrik Lundh6c86b992000-07-09 17:12:58 +000032 # Locale emulation
33
Marc-André Lemburg23481142000-06-08 17:49:41 +000034 CHAR_MAX = 127
35 LC_ALL = 6
36 LC_COLLATE = 3
37 LC_CTYPE = 0
38 LC_MESSAGES = 5
39 LC_MONETARY = 4
40 LC_NUMERIC = 1
41 LC_TIME = 2
42 Error = ValueError
43
44 def localeconv():
Fredrik Lundh6c86b992000-07-09 17:12:58 +000045 """ localeconv() -> dict.
Marc-André Lemburg23481142000-06-08 17:49:41 +000046 Returns numeric and monetary locale-specific parameters.
47 """
48 # 'C' locale default values
49 return {'grouping': [127],
50 'currency_symbol': '',
51 'n_sign_posn': 127,
Fredrik Lundh6c86b992000-07-09 17:12:58 +000052 'p_cs_precedes': 127,
53 'n_cs_precedes': 127,
54 'mon_grouping': [],
Marc-André Lemburg23481142000-06-08 17:49:41 +000055 'n_sep_by_space': 127,
56 'decimal_point': '.',
57 'negative_sign': '',
58 'positive_sign': '',
Fredrik Lundh6c86b992000-07-09 17:12:58 +000059 'p_sep_by_space': 127,
Marc-André Lemburg23481142000-06-08 17:49:41 +000060 'int_curr_symbol': '',
Fredrik Lundh6c86b992000-07-09 17:12:58 +000061 'p_sign_posn': 127,
Marc-André Lemburg23481142000-06-08 17:49:41 +000062 'thousands_sep': '',
Fredrik Lundh6c86b992000-07-09 17:12:58 +000063 'mon_thousands_sep': '',
64 'frac_digits': 127,
Marc-André Lemburg23481142000-06-08 17:49:41 +000065 'mon_decimal_point': '',
66 'int_frac_digits': 127}
Fredrik Lundh6c86b992000-07-09 17:12:58 +000067
Marc-André Lemburg23481142000-06-08 17:49:41 +000068 def setlocale(category, value=None):
Fredrik Lundh6c86b992000-07-09 17:12:58 +000069 """ setlocale(integer,string=None) -> string.
Marc-André Lemburg23481142000-06-08 17:49:41 +000070 Activates/queries locale processing.
71 """
72 if value is not None and \
73 value is not 'C':
Fredrik Lundh6c86b992000-07-09 17:12:58 +000074 raise Error, '_locale emulation only supports "C" locale'
Marc-André Lemburg23481142000-06-08 17:49:41 +000075 return 'C'
76
77 def strcoll(a,b):
Fredrik Lundh6c86b992000-07-09 17:12:58 +000078 """ strcoll(string,string) -> int.
Marc-André Lemburg23481142000-06-08 17:49:41 +000079 Compares two strings according to the locale.
80 """
81 return cmp(a,b)
82
83 def strxfrm(s):
Fredrik Lundh6c86b992000-07-09 17:12:58 +000084 """ strxfrm(string) -> string.
Marc-André Lemburg23481142000-06-08 17:49:41 +000085 Returns a string that behaves for cmp locale-aware.
86 """
87 return s
Marc-André Lemburg5431bc32000-06-07 09:11:40 +000088
89### Number formatting APIs
90
91# Author: Martin von Loewis
Guido van Rossumeef1d4e1997-11-19 19:01:43 +000092
93#perform the grouping from right to left
94def _group(s):
95 conv=localeconv()
96 grouping=conv['grouping']
97 if not grouping:return s
98 result=""
99 while s and grouping:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000100 # if grouping is -1, we are done
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000101 if grouping[0]==CHAR_MAX:
102 break
103 # 0: re-use last group ad infinitum
104 elif grouping[0]!=0:
105 #process last group
106 group=grouping[0]
107 grouping=grouping[1:]
108 if result:
109 result=s[-group:]+conv['thousands_sep']+result
110 else:
111 result=s[-group:]
112 s=s[:-group]
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000113 if not result:
114 return s
115 if s:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000116 result=s+conv['thousands_sep']+result
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000117 return result
118
119def format(f,val,grouping=0):
120 """Formats a value in the same way that the % formatting would use,
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000121 but takes the current locale into account.
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000122 Grouping is applied if the third parameter is true."""
Martin v. Löwisdb786872001-01-21 18:52:33 +0000123 result = f % abs(val)
124 fields = result.split(".")
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000125 if grouping:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000126 fields[0]=_group(fields[0])
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000127 if len(fields)==2:
Martin v. Löwisdb786872001-01-21 18:52:33 +0000128 res = fields[0]+localeconv()['decimal_point']+fields[1]
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000129 elif len(fields)==1:
Martin v. Löwisdb786872001-01-21 18:52:33 +0000130 res = fields[0]
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000131 else:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000132 raise Error, "Too many decimal points in result string"
133
Martin v. Löwisdb786872001-01-21 18:52:33 +0000134 if val < 0:
135 return '-'+res
136 else:
137 return res
138
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000139def str(val):
140 """Convert float to integer, taking the locale into account."""
141 return format("%.12g",val)
142
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000143def atof(str,func=float):
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000144 "Parses a string as a float according to the locale settings."
145 #First, get rid of the grouping
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000146 ts = localeconv()['thousands_sep']
147 if ts:
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000148 s=str.split(ts)
149 str="".join(s)
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000150 #next, replace the decimal point with a dot
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000151 dd = localeconv()['decimal_point']
152 if dd:
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000153 s=str.split(dd)
154 str='.'.join(s)
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000155 #finally, parse the string
156 return func(str)
157
158def atoi(str):
159 "Converts a string to an integer according to the locale settings."
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000160 return atof(str, int)
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000161
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000162def _test():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000163 setlocale(LC_ALL, "")
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000164 #do grouping
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000165 s1=format("%d", 123456789,1)
166 print s1, "is", atoi(s1)
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000167 #standard formatting
168 s1=str(3.14)
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000169 print s1, "is", atof(s1)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000170
171### Locale name aliasing engine
172
173# Author: Marc-Andre Lemburg, mal@lemburg.com
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000174# Various tweaks by Fredrik Lundh <effbot@telia.com>
175
176# store away the low-level version of setlocale (it's
177# overridden below)
178_setlocale = setlocale
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000179
180def normalize(localename):
181
182 """ Returns a normalized locale code for the given locale
183 name.
184
185 The returned locale code is formatted for use with
186 setlocale().
187
188 If normalization fails, the original name is returned
189 unchanged.
190
191 If the given encoding is not known, the function defaults to
192 the default encoding for the locale code just like setlocale()
193 does.
194
195 """
196 # Normalize the locale name and extract the encoding
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000197 fullname = localename.lower()
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000198 if ':' in fullname:
199 # ':' is sometimes used as encoding delimiter.
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000200 fullname = fullname.replace(':', '.')
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000201 if '.' in fullname:
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000202 langname, encoding = fullname.split('.')[:2]
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000203 fullname = langname + '.' + encoding
204 else:
205 langname = fullname
206 encoding = ''
207
208 # First lookup: fullname (possibly with encoding)
209 code = locale_alias.get(fullname, None)
210 if code is not None:
211 return code
212
213 # Second try: langname (without encoding)
214 code = locale_alias.get(langname, None)
215 if code is not None:
216 if '.' in code:
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000217 langname, defenc = code.split('.')
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000218 else:
219 langname = code
220 defenc = ''
221 if encoding:
222 encoding = encoding_alias.get(encoding, encoding)
223 else:
224 encoding = defenc
225 if encoding:
226 return langname + '.' + encoding
227 else:
228 return langname
229
230 else:
231 return localename
232
233def _parse_localename(localename):
234
235 """ Parses the locale code for localename and returns the
236 result as tuple (language code, encoding).
237
238 The localename is normalized and passed through the locale
239 alias engine. A ValueError is raised in case the locale name
240 cannot be parsed.
241
242 The language code corresponds to RFC 1766. code and encoding
243 can be None in case the values cannot be determined or are
Jeremy Hyltona05e2932000-06-28 14:48:01 +0000244 unknown to this implementation.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000245
246 """
247 code = normalize(localename)
248 if '.' in code:
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000249 return code.split('.')[:2]
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000250 elif code == 'C':
251 return None, None
252 else:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000253 raise ValueError, 'unknown locale: %s' % localename
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000254 return l
255
256def _build_localename(localetuple):
257
258 """ Builds a locale code from the given tuple (language code,
259 encoding).
260
261 No aliasing or normalizing takes place.
262
263 """
264 language, encoding = localetuple
265 if language is None:
266 language = 'C'
267 if encoding is None:
268 return language
269 else:
270 return language + '.' + encoding
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000271
272def getdefaultlocale(envvars=('LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000273
274 """ Tries to determine the default locale settings and returns
275 them as tuple (language code, encoding).
276
277 According to POSIX, a program which has not called
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000278 setlocale(LC_ALL, "") runs using the portable 'C' locale.
279 Calling setlocale(LC_ALL, "") lets it use the default locale as
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000280 defined by the LANG variable. Since we don't want to interfere
Thomas Wouters7e474022000-07-16 12:04:32 +0000281 with the current locale setting we thus emulate the behavior
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000282 in the way described above.
283
284 To maintain compatibility with other platforms, not only the
285 LANG variable is tested, but a list of variables given as
286 envvars parameter. The first found to be defined will be
287 used. envvars defaults to the search path used in GNU gettext;
288 it must always contain the variable name 'LANG'.
289
290 Except for the code 'C', the language code corresponds to RFC
291 1766. code and encoding can be None in case the values cannot
292 be determined.
293
294 """
Fredrik Lundh04661322000-07-09 23:16:10 +0000295
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000296 try:
297 # check if it's supported by the _locale module
298 import _locale
299 code, encoding = _locale._getdefaultlocale()
Fredrik Lundh04661322000-07-09 23:16:10 +0000300 except (ImportError, AttributeError):
301 pass
302 else:
Fredrik Lundh663809e2000-07-10 19:32:19 +0000303 # make sure the code/encoding values are valid
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000304 if sys.platform == "win32" and code and code[:2] == "0x":
305 # map windows language identifier to language name
306 code = windows_locale.get(int(code, 0))
Fredrik Lundh663809e2000-07-10 19:32:19 +0000307 # ...add other platform-specific processing here, if
308 # necessary...
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000309 return code, encoding
Fredrik Lundh04661322000-07-09 23:16:10 +0000310
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000311 # fall back on POSIX behaviour
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000312 import os
313 lookup = os.environ.get
314 for variable in envvars:
315 localename = lookup(variable,None)
316 if localename is not None:
317 break
318 else:
319 localename = 'C'
320 return _parse_localename(localename)
321
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000322
323def getlocale(category=LC_CTYPE):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000324
325 """ Returns the current setting for the given locale category as
326 tuple (language code, encoding).
327
328 category may be one of the LC_* value except LC_ALL. It
329 defaults to LC_CTYPE.
330
331 Except for the code 'C', the language code corresponds to RFC
332 1766. code and encoding can be None in case the values cannot
333 be determined.
334
335 """
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000336 localename = _setlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000337 if category == LC_ALL and ';' in localename:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000338 raise TypeError, 'category LC_ALL is not supported'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000339 return _parse_localename(localename)
340
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000341def setlocale(category, locale=None):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000342
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000343 """ Set the locale for the given category. The locale can be
344 a string, a locale tuple (language code, encoding), or None.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000345
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000346 Locale tuples are converted to strings the locale aliasing
347 engine. Locale strings are passed directly to the C lib.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000348
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000349 category may be given as one of the LC_* values.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000350
351 """
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000352 if locale and type(locale) is not type(""):
353 # convert to string
354 locale = normalize(_build_localename(locale))
355 return _setlocale(category, locale)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000356
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000357def resetlocale(category=LC_ALL):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000358
359 """ Sets the locale for category to the default setting.
360
361 The default setting is determined by calling
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000362 getdefaultlocale(). category defaults to LC_ALL.
363
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000364 """
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000365 _setlocale(category, _build_localename(getdefaultlocale()))
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000366
367### Database
368#
369# The following data was extracted from the locale.alias file which
370# comes with X11 and then hand edited removing the explicit encoding
371# definitions and adding some more aliases. The file is usually
372# available as /usr/lib/X11/locale/locale.alias.
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000373#
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000374
375#
376# The encoding_alias table maps lowercase encoding alias names to C
377# locale encoding names (case-sensitive).
378#
379encoding_alias = {
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000380 '437': 'C',
381 'c': 'C',
382 'iso8859': 'ISO8859-1',
383 '8859': 'ISO8859-1',
384 '88591': 'ISO8859-1',
385 'ascii': 'ISO8859-1',
386 'en': 'ISO8859-1',
387 'iso88591': 'ISO8859-1',
388 'iso_8859-1': 'ISO8859-1',
389 '885915': 'ISO8859-15',
390 'iso885915': 'ISO8859-15',
391 'iso_8859-15': 'ISO8859-15',
392 'iso8859-2': 'ISO8859-2',
393 'iso88592': 'ISO8859-2',
394 'iso_8859-2': 'ISO8859-2',
395 'iso88595': 'ISO8859-5',
396 'iso88596': 'ISO8859-6',
397 'iso88597': 'ISO8859-7',
398 'iso88598': 'ISO8859-8',
399 'iso88599': 'ISO8859-9',
400 'iso-2022-jp': 'JIS7',
401 'jis': 'JIS7',
402 'jis7': 'JIS7',
403 'sjis': 'SJIS',
404 'tis620': 'TACTIS',
405 'ajec': 'eucJP',
406 'eucjp': 'eucJP',
407 'ujis': 'eucJP',
408 'utf-8': 'utf',
409 'utf8': 'utf',
410 'utf8@ucs4': 'utf',
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000411}
412
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000413#
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000414# The locale_alias table maps lowercase alias names to C locale names
415# (case-sensitive). Encodings are always separated from the locale
416# name using a dot ('.'); they should only be given in case the
417# language name is needed to interpret the given encoding alias
418# correctly (CJK codes often have this need).
419#
420locale_alias = {
421 'american': 'en_US.ISO8859-1',
422 'ar': 'ar_AA.ISO8859-6',
423 'ar_aa': 'ar_AA.ISO8859-6',
424 'ar_sa': 'ar_SA.ISO8859-6',
425 'arabic': 'ar_AA.ISO8859-6',
426 'bg': 'bg_BG.ISO8859-5',
427 'bg_bg': 'bg_BG.ISO8859-5',
428 'bulgarian': 'bg_BG.ISO8859-5',
429 'c-french': 'fr_CA.ISO8859-1',
430 'c': 'C',
431 'c_c': 'C',
432 'cextend': 'en_US.ISO8859-1',
433 'chinese-s': 'zh_CN.eucCN',
434 'chinese-t': 'zh_TW.eucTW',
435 'croatian': 'hr_HR.ISO8859-2',
436 'cs': 'cs_CZ.ISO8859-2',
437 'cs_cs': 'cs_CZ.ISO8859-2',
438 'cs_cz': 'cs_CZ.ISO8859-2',
439 'cz': 'cz_CZ.ISO8859-2',
440 'cz_cz': 'cz_CZ.ISO8859-2',
441 'czech': 'cs_CS.ISO8859-2',
442 'da': 'da_DK.ISO8859-1',
443 'da_dk': 'da_DK.ISO8859-1',
444 'danish': 'da_DK.ISO8859-1',
445 'de': 'de_DE.ISO8859-1',
446 'de_at': 'de_AT.ISO8859-1',
447 'de_ch': 'de_CH.ISO8859-1',
448 'de_de': 'de_DE.ISO8859-1',
449 'dutch': 'nl_BE.ISO8859-1',
450 'ee': 'ee_EE.ISO8859-4',
451 'el': 'el_GR.ISO8859-7',
452 'el_gr': 'el_GR.ISO8859-7',
453 'en': 'en_US.ISO8859-1',
454 'en_au': 'en_AU.ISO8859-1',
455 'en_ca': 'en_CA.ISO8859-1',
456 'en_gb': 'en_GB.ISO8859-1',
457 'en_ie': 'en_IE.ISO8859-1',
458 'en_nz': 'en_NZ.ISO8859-1',
459 'en_uk': 'en_GB.ISO8859-1',
460 'en_us': 'en_US.ISO8859-1',
461 'eng_gb': 'en_GB.ISO8859-1',
462 'english': 'en_EN.ISO8859-1',
463 'english_uk': 'en_GB.ISO8859-1',
464 'english_united-states': 'en_US.ISO8859-1',
465 'english_us': 'en_US.ISO8859-1',
466 'es': 'es_ES.ISO8859-1',
467 'es_ar': 'es_AR.ISO8859-1',
468 'es_bo': 'es_BO.ISO8859-1',
469 'es_cl': 'es_CL.ISO8859-1',
470 'es_co': 'es_CO.ISO8859-1',
471 'es_cr': 'es_CR.ISO8859-1',
472 'es_ec': 'es_EC.ISO8859-1',
473 'es_es': 'es_ES.ISO8859-1',
474 'es_gt': 'es_GT.ISO8859-1',
475 'es_mx': 'es_MX.ISO8859-1',
476 'es_ni': 'es_NI.ISO8859-1',
477 'es_pa': 'es_PA.ISO8859-1',
478 'es_pe': 'es_PE.ISO8859-1',
479 'es_py': 'es_PY.ISO8859-1',
480 'es_sv': 'es_SV.ISO8859-1',
481 'es_uy': 'es_UY.ISO8859-1',
482 'es_ve': 'es_VE.ISO8859-1',
483 'et': 'et_EE.ISO8859-4',
484 'et_ee': 'et_EE.ISO8859-4',
485 'fi': 'fi_FI.ISO8859-1',
486 'fi_fi': 'fi_FI.ISO8859-1',
487 'finnish': 'fi_FI.ISO8859-1',
488 'fr': 'fr_FR.ISO8859-1',
489 'fr_be': 'fr_BE.ISO8859-1',
490 'fr_ca': 'fr_CA.ISO8859-1',
491 'fr_ch': 'fr_CH.ISO8859-1',
492 'fr_fr': 'fr_FR.ISO8859-1',
493 'fre_fr': 'fr_FR.ISO8859-1',
494 'french': 'fr_FR.ISO8859-1',
495 'french_france': 'fr_FR.ISO8859-1',
496 'ger_de': 'de_DE.ISO8859-1',
497 'german': 'de_DE.ISO8859-1',
498 'german_germany': 'de_DE.ISO8859-1',
499 'greek': 'el_GR.ISO8859-7',
500 'hebrew': 'iw_IL.ISO8859-8',
501 'hr': 'hr_HR.ISO8859-2',
502 'hr_hr': 'hr_HR.ISO8859-2',
503 'hu': 'hu_HU.ISO8859-2',
504 'hu_hu': 'hu_HU.ISO8859-2',
505 'hungarian': 'hu_HU.ISO8859-2',
506 'icelandic': 'is_IS.ISO8859-1',
507 'id': 'id_ID.ISO8859-1',
508 'id_id': 'id_ID.ISO8859-1',
509 'is': 'is_IS.ISO8859-1',
510 'is_is': 'is_IS.ISO8859-1',
511 'iso-8859-1': 'en_US.ISO8859-1',
512 'iso-8859-15': 'en_US.ISO8859-15',
513 'iso8859-1': 'en_US.ISO8859-1',
514 'iso8859-15': 'en_US.ISO8859-15',
515 'iso_8859_1': 'en_US.ISO8859-1',
516 'iso_8859_15': 'en_US.ISO8859-15',
517 'it': 'it_IT.ISO8859-1',
518 'it_ch': 'it_CH.ISO8859-1',
519 'it_it': 'it_IT.ISO8859-1',
520 'italian': 'it_IT.ISO8859-1',
521 'iw': 'iw_IL.ISO8859-8',
522 'iw_il': 'iw_IL.ISO8859-8',
523 'ja': 'ja_JP.eucJP',
524 'ja.jis': 'ja_JP.JIS7',
525 'ja.sjis': 'ja_JP.SJIS',
526 'ja_jp': 'ja_JP.eucJP',
527 'ja_jp.ajec': 'ja_JP.eucJP',
528 'ja_jp.euc': 'ja_JP.eucJP',
529 'ja_jp.eucjp': 'ja_JP.eucJP',
530 'ja_jp.iso-2022-jp': 'ja_JP.JIS7',
531 'ja_jp.jis': 'ja_JP.JIS7',
532 'ja_jp.jis7': 'ja_JP.JIS7',
533 'ja_jp.mscode': 'ja_JP.SJIS',
534 'ja_jp.sjis': 'ja_JP.SJIS',
535 'ja_jp.ujis': 'ja_JP.eucJP',
536 'japan': 'ja_JP.eucJP',
537 'japanese': 'ja_JP.SJIS',
538 'japanese-euc': 'ja_JP.eucJP',
539 'japanese.euc': 'ja_JP.eucJP',
540 'jp_jp': 'ja_JP.eucJP',
541 'ko': 'ko_KR.eucKR',
542 'ko_kr': 'ko_KR.eucKR',
543 'ko_kr.euc': 'ko_KR.eucKR',
544 'korean': 'ko_KR.eucKR',
545 'lt': 'lt_LT.ISO8859-4',
546 'lv': 'lv_LV.ISO8859-4',
547 'mk': 'mk_MK.ISO8859-5',
548 'mk_mk': 'mk_MK.ISO8859-5',
549 'nl': 'nl_NL.ISO8859-1',
550 'nl_be': 'nl_BE.ISO8859-1',
551 'nl_nl': 'nl_NL.ISO8859-1',
552 'no': 'no_NO.ISO8859-1',
553 'no_no': 'no_NO.ISO8859-1',
554 'norwegian': 'no_NO.ISO8859-1',
555 'pl': 'pl_PL.ISO8859-2',
556 'pl_pl': 'pl_PL.ISO8859-2',
557 'polish': 'pl_PL.ISO8859-2',
558 'portuguese': 'pt_PT.ISO8859-1',
559 'portuguese_brazil': 'pt_BR.ISO8859-1',
560 'posix': 'C',
561 'posix-utf2': 'C',
562 'pt': 'pt_PT.ISO8859-1',
563 'pt_br': 'pt_BR.ISO8859-1',
564 'pt_pt': 'pt_PT.ISO8859-1',
565 'ro': 'ro_RO.ISO8859-2',
566 'ro_ro': 'ro_RO.ISO8859-2',
567 'ru': 'ru_RU.ISO8859-5',
568 'ru_ru': 'ru_RU.ISO8859-5',
569 'rumanian': 'ro_RO.ISO8859-2',
570 'russian': 'ru_RU.ISO8859-5',
571 'serbocroatian': 'sh_YU.ISO8859-2',
572 'sh': 'sh_YU.ISO8859-2',
573 'sh_hr': 'sh_HR.ISO8859-2',
574 'sh_sp': 'sh_YU.ISO8859-2',
575 'sh_yu': 'sh_YU.ISO8859-2',
576 'sk': 'sk_SK.ISO8859-2',
577 'sk_sk': 'sk_SK.ISO8859-2',
578 'sl': 'sl_CS.ISO8859-2',
579 'sl_cs': 'sl_CS.ISO8859-2',
580 'sl_si': 'sl_SI.ISO8859-2',
581 'slovak': 'sk_SK.ISO8859-2',
582 'slovene': 'sl_CS.ISO8859-2',
583 'sp': 'sp_YU.ISO8859-5',
584 'sp_yu': 'sp_YU.ISO8859-5',
585 'spanish': 'es_ES.ISO8859-1',
586 'spanish_spain': 'es_ES.ISO8859-1',
587 'sr_sp': 'sr_SP.ISO8859-2',
588 'sv': 'sv_SE.ISO8859-1',
589 'sv_se': 'sv_SE.ISO8859-1',
590 'swedish': 'sv_SE.ISO8859-1',
591 'th_th': 'th_TH.TACTIS',
592 'tr': 'tr_TR.ISO8859-9',
593 'tr_tr': 'tr_TR.ISO8859-9',
594 'turkish': 'tr_TR.ISO8859-9',
595 'univ': 'en_US.utf',
596 'universal': 'en_US.utf',
597 'zh': 'zh_CN.eucCN',
598 'zh_cn': 'zh_CN.eucCN',
599 'zh_cn.big5': 'zh_TW.eucTW',
600 'zh_cn.euc': 'zh_CN.eucCN',
601 'zh_tw': 'zh_TW.eucTW',
602 'zh_tw.euc': 'zh_TW.eucTW',
603}
604
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000605#
606# this maps windows language identifiers (as used on Windows 95 and
607# earlier) to locale strings.
608#
609# NOTE: this mapping is incomplete. If your language is missing, send
610# a note with the missing language identifier and the suggested locale
611# code to Fredrik Lundh <effbot@telia.com>. Thanks /F
612
613windows_locale = {
614 0x0404: "zh_TW", # Chinese (Taiwan)
615 0x0804: "zh_CN", # Chinese (PRC)
616 0x0406: "da_DK", # Danish
617 0x0413: "nl_NL", # Dutch (Netherlands)
618 0x0409: "en_US", # English (United States)
619 0x0809: "en_UK", # English (United Kingdom)
620 0x0c09: "en_AU", # English (Australian)
621 0x1009: "en_CA", # English (Canadian)
622 0x1409: "en_NZ", # English (New Zealand)
623 0x1809: "en_IE", # English (Ireland)
624 0x1c09: "en_ZA", # English (South Africa)
625 0x040b: "fi_FI", # Finnish
626 0x040c: "fr_FR", # French (Standard)
627 0x080c: "fr_BE", # French (Belgian)
628 0x0c0c: "fr_CA", # French (Canadian)
629 0x100c: "fr_CH", # French (Switzerland)
630 0x0407: "de_DE", # German (Standard)
631 0x0408: "el_GR", # Greek
632 0x040d: "iw_IL", # Hebrew
633 0x040f: "is_IS", # Icelandic
634 0x0410: "it_IT", # Italian (Standard)
635 0x0411: "ja_JA", # Japanese
636 0x0414: "no_NO", # Norwegian (Bokmal)
637 0x0816: "pt_PT", # Portuguese (Standard)
638 0x0c0a: "es_ES", # Spanish (Modern Sort)
639 0x0441: "sw_KE", # Swahili (Kenya)
640 0x041d: "sv_SE", # Swedish
641 0x081d: "sv_FI", # Swedish (Finland)
642 0x041f: "tr_TR", # Turkish
643}
644
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000645def _print_locale():
646
647 """ Test function.
648 """
649 categories = {}
650 def _init_categories(categories=categories):
651 for k,v in globals().items():
652 if k[:3] == 'LC_':
653 categories[k] = v
654 _init_categories()
655 del categories['LC_ALL']
656
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000657 print 'Locale defaults as determined by getdefaultlocale():'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000658 print '-'*72
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000659 lang, enc = getdefaultlocale()
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000660 print 'Language: ', lang or '(undefined)'
661 print 'Encoding: ', enc or '(undefined)'
662 print
663
664 print 'Locale settings on startup:'
665 print '-'*72
666 for name,category in categories.items():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000667 print name, '...'
668 lang, enc = getlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000669 print ' Language: ', lang or '(undefined)'
670 print ' Encoding: ', enc or '(undefined)'
671 print
672
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000673 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000674 print 'Locale settings after calling resetlocale():'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000675 print '-'*72
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000676 resetlocale()
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000677 for name,category in categories.items():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000678 print name, '...'
679 lang, enc = getlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000680 print ' Language: ', lang or '(undefined)'
681 print ' Encoding: ', enc or '(undefined)'
682 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000683
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000684 try:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000685 setlocale(LC_ALL, "")
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000686 except:
687 print 'NOTE:'
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000688 print 'setlocale(LC_ALL, "") does not support the default locale'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000689 print 'given in the OS environment variables.'
690 else:
691 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000692 print 'Locale settings after calling setlocale(LC_ALL, ""):'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000693 print '-'*72
694 for name,category in categories.items():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000695 print name, '...'
696 lang, enc = getlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000697 print ' Language: ', lang or '(undefined)'
698 print ' Encoding: ', enc or '(undefined)'
699 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000700
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000701###
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000702
Tim Peters1baf8292001-01-24 10:13:46 +0000703try:
704 LC_MESSAGES
705except:
706 pass
707else:
708 __all__.append("LC_MESSAGES")
709
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000710if __name__=='__main__':
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000711 print 'Locale aliasing:'
712 print
713 _print_locale()
714 print
715 print 'Number formatting:'
716 print
717 _test()