blob: ac272e769efbcb18045ccc4b98a82129d94a1586 [file] [log] [blame]
Marc-André Lemburg5431bc32000-06-07 09:11:40 +00001""" Locale support.
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00002
Marc-André Lemburg5431bc32000-06-07 09:11:40 +00003 The module provides low-level access to the C lib's locale APIs
4 and adds high level number formatting APIs as well as a locale
5 aliasing engine to complement these.
6
7 The aliasing engine includes support for many commonly used locale
8 names and maps them to values suitable for passing to the C lib's
9 setlocale() function. It also includes default encodings for all
10 supported locale names.
11
12"""
13
Fredrik Lundh6c86b992000-07-09 17:12:58 +000014import string, sys
Marc-André Lemburg5431bc32000-06-07 09:11:40 +000015
Fredrik Lundh6c86b992000-07-09 17:12:58 +000016# Try importing the _locale module.
17#
18# If this fails, fall back on a basic 'C' locale emulation.
19#
Guido van Rossumeef1d4e1997-11-19 19:01:43 +000020
Marc-André Lemburg23481142000-06-08 17:49:41 +000021try:
Fredrik Lundh6c86b992000-07-09 17:12:58 +000022
Marc-André Lemburg23481142000-06-08 17:49:41 +000023 from _locale import *
24
25except ImportError:
26
Fredrik Lundh6c86b992000-07-09 17:12:58 +000027 # Locale emulation
28
Marc-André Lemburg23481142000-06-08 17:49:41 +000029 CHAR_MAX = 127
30 LC_ALL = 6
31 LC_COLLATE = 3
32 LC_CTYPE = 0
33 LC_MESSAGES = 5
34 LC_MONETARY = 4
35 LC_NUMERIC = 1
36 LC_TIME = 2
37 Error = ValueError
38
39 def localeconv():
Fredrik Lundh6c86b992000-07-09 17:12:58 +000040 """ localeconv() -> dict.
Marc-André Lemburg23481142000-06-08 17:49:41 +000041 Returns numeric and monetary locale-specific parameters.
42 """
43 # 'C' locale default values
44 return {'grouping': [127],
45 'currency_symbol': '',
46 'n_sign_posn': 127,
Fredrik Lundh6c86b992000-07-09 17:12:58 +000047 'p_cs_precedes': 127,
48 'n_cs_precedes': 127,
49 'mon_grouping': [],
Marc-André Lemburg23481142000-06-08 17:49:41 +000050 'n_sep_by_space': 127,
51 'decimal_point': '.',
52 'negative_sign': '',
53 'positive_sign': '',
Fredrik Lundh6c86b992000-07-09 17:12:58 +000054 'p_sep_by_space': 127,
Marc-André Lemburg23481142000-06-08 17:49:41 +000055 'int_curr_symbol': '',
Fredrik Lundh6c86b992000-07-09 17:12:58 +000056 'p_sign_posn': 127,
Marc-André Lemburg23481142000-06-08 17:49:41 +000057 'thousands_sep': '',
Fredrik Lundh6c86b992000-07-09 17:12:58 +000058 'mon_thousands_sep': '',
59 'frac_digits': 127,
Marc-André Lemburg23481142000-06-08 17:49:41 +000060 'mon_decimal_point': '',
61 'int_frac_digits': 127}
Fredrik Lundh6c86b992000-07-09 17:12:58 +000062
Marc-André Lemburg23481142000-06-08 17:49:41 +000063 def setlocale(category, value=None):
Fredrik Lundh6c86b992000-07-09 17:12:58 +000064 """ setlocale(integer,string=None) -> string.
Marc-André Lemburg23481142000-06-08 17:49:41 +000065 Activates/queries locale processing.
66 """
67 if value is not None and \
68 value is not 'C':
Fredrik Lundh6c86b992000-07-09 17:12:58 +000069 raise Error, '_locale emulation only supports "C" locale'
Marc-André Lemburg23481142000-06-08 17:49:41 +000070 return 'C'
71
72 def strcoll(a,b):
Fredrik Lundh6c86b992000-07-09 17:12:58 +000073 """ strcoll(string,string) -> int.
Marc-André Lemburg23481142000-06-08 17:49:41 +000074 Compares two strings according to the locale.
75 """
76 return cmp(a,b)
77
78 def strxfrm(s):
Fredrik Lundh6c86b992000-07-09 17:12:58 +000079 """ strxfrm(string) -> string.
Marc-André Lemburg23481142000-06-08 17:49:41 +000080 Returns a string that behaves for cmp locale-aware.
81 """
82 return s
Marc-André Lemburg5431bc32000-06-07 09:11:40 +000083
84### Number formatting APIs
85
86# Author: Martin von Loewis
Guido van Rossumeef1d4e1997-11-19 19:01:43 +000087
88#perform the grouping from right to left
89def _group(s):
90 conv=localeconv()
91 grouping=conv['grouping']
92 if not grouping:return s
93 result=""
94 while s and grouping:
Fredrik Lundh6c86b992000-07-09 17:12:58 +000095 # if grouping is -1, we are done
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000096 if grouping[0]==CHAR_MAX:
97 break
98 # 0: re-use last group ad infinitum
99 elif grouping[0]!=0:
100 #process last group
101 group=grouping[0]
102 grouping=grouping[1:]
103 if result:
104 result=s[-group:]+conv['thousands_sep']+result
105 else:
106 result=s[-group:]
107 s=s[:-group]
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000108 if not result:
109 return s
110 if s:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000111 result=s+conv['thousands_sep']+result
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000112 return result
113
114def format(f,val,grouping=0):
115 """Formats a value in the same way that the % formatting would use,
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000116 but takes the current locale into account.
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000117 Grouping is applied if the third parameter is true."""
Martin v. Löwisdb786872001-01-21 18:52:33 +0000118 result = f % abs(val)
119 fields = result.split(".")
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000120 if grouping:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000121 fields[0]=_group(fields[0])
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000122 if len(fields)==2:
Martin v. Löwisdb786872001-01-21 18:52:33 +0000123 res = fields[0]+localeconv()['decimal_point']+fields[1]
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000124 elif len(fields)==1:
Martin v. Löwisdb786872001-01-21 18:52:33 +0000125 res = fields[0]
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000126 else:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000127 raise Error, "Too many decimal points in result string"
128
Martin v. Löwisdb786872001-01-21 18:52:33 +0000129 if val < 0:
130 return '-'+res
131 else:
132 return res
133
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000134def str(val):
135 """Convert float to integer, taking the locale into account."""
136 return format("%.12g",val)
137
138def atof(str,func=string.atof):
139 "Parses a string as a float according to the locale settings."
140 #First, get rid of the grouping
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000141 ts = localeconv()['thousands_sep']
142 if ts:
143 s=string.split(str,ts)
144 str=string.join(s, "")
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000145 #next, replace the decimal point with a dot
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000146 dd = localeconv()['decimal_point']
147 if dd:
148 s=string.split(str,dd)
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000149 str=string.join(s, '.')
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000150 #finally, parse the string
151 return func(str)
152
153def atoi(str):
154 "Converts a string to an integer according to the locale settings."
155 return atof(str,string.atoi)
156
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000157def _test():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000158 setlocale(LC_ALL, "")
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000159 #do grouping
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000160 s1=format("%d", 123456789,1)
161 print s1, "is", atoi(s1)
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000162 #standard formatting
163 s1=str(3.14)
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000164 print s1, "is", atof(s1)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000165
166### Locale name aliasing engine
167
168# Author: Marc-Andre Lemburg, mal@lemburg.com
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000169# Various tweaks by Fredrik Lundh <effbot@telia.com>
170
171# store away the low-level version of setlocale (it's
172# overridden below)
173_setlocale = setlocale
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000174
175def normalize(localename):
176
177 """ Returns a normalized locale code for the given locale
178 name.
179
180 The returned locale code is formatted for use with
181 setlocale().
182
183 If normalization fails, the original name is returned
184 unchanged.
185
186 If the given encoding is not known, the function defaults to
187 the default encoding for the locale code just like setlocale()
188 does.
189
190 """
191 # Normalize the locale name and extract the encoding
192 fullname = string.lower(localename)
193 if ':' in fullname:
194 # ':' is sometimes used as encoding delimiter.
195 fullname = string.replace(fullname, ':', '.')
196 if '.' in fullname:
197 langname, encoding = string.split(fullname, '.')[:2]
198 fullname = langname + '.' + encoding
199 else:
200 langname = fullname
201 encoding = ''
202
203 # First lookup: fullname (possibly with encoding)
204 code = locale_alias.get(fullname, None)
205 if code is not None:
206 return code
207
208 # Second try: langname (without encoding)
209 code = locale_alias.get(langname, None)
210 if code is not None:
211 if '.' in code:
212 langname, defenc = string.split(code, '.')
213 else:
214 langname = code
215 defenc = ''
216 if encoding:
217 encoding = encoding_alias.get(encoding, encoding)
218 else:
219 encoding = defenc
220 if encoding:
221 return langname + '.' + encoding
222 else:
223 return langname
224
225 else:
226 return localename
227
228def _parse_localename(localename):
229
230 """ Parses the locale code for localename and returns the
231 result as tuple (language code, encoding).
232
233 The localename is normalized and passed through the locale
234 alias engine. A ValueError is raised in case the locale name
235 cannot be parsed.
236
237 The language code corresponds to RFC 1766. code and encoding
238 can be None in case the values cannot be determined or are
Jeremy Hyltona05e2932000-06-28 14:48:01 +0000239 unknown to this implementation.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000240
241 """
242 code = normalize(localename)
243 if '.' in code:
244 return string.split(code, '.')[:2]
245 elif code == 'C':
246 return None, None
247 else:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000248 raise ValueError, 'unknown locale: %s' % localename
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000249 return l
250
251def _build_localename(localetuple):
252
253 """ Builds a locale code from the given tuple (language code,
254 encoding).
255
256 No aliasing or normalizing takes place.
257
258 """
259 language, encoding = localetuple
260 if language is None:
261 language = 'C'
262 if encoding is None:
263 return language
264 else:
265 return language + '.' + encoding
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000266
267def getdefaultlocale(envvars=('LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000268
269 """ Tries to determine the default locale settings and returns
270 them as tuple (language code, encoding).
271
272 According to POSIX, a program which has not called
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000273 setlocale(LC_ALL, "") runs using the portable 'C' locale.
274 Calling setlocale(LC_ALL, "") lets it use the default locale as
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000275 defined by the LANG variable. Since we don't want to interfere
Thomas Wouters7e474022000-07-16 12:04:32 +0000276 with the current locale setting we thus emulate the behavior
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000277 in the way described above.
278
279 To maintain compatibility with other platforms, not only the
280 LANG variable is tested, but a list of variables given as
281 envvars parameter. The first found to be defined will be
282 used. envvars defaults to the search path used in GNU gettext;
283 it must always contain the variable name 'LANG'.
284
285 Except for the code 'C', the language code corresponds to RFC
286 1766. code and encoding can be None in case the values cannot
287 be determined.
288
289 """
Fredrik Lundh04661322000-07-09 23:16:10 +0000290
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000291 try:
292 # check if it's supported by the _locale module
293 import _locale
294 code, encoding = _locale._getdefaultlocale()
Fredrik Lundh04661322000-07-09 23:16:10 +0000295 except (ImportError, AttributeError):
296 pass
297 else:
Fredrik Lundh663809e2000-07-10 19:32:19 +0000298 # make sure the code/encoding values are valid
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000299 if sys.platform == "win32" and code and code[:2] == "0x":
300 # map windows language identifier to language name
301 code = windows_locale.get(int(code, 0))
Fredrik Lundh663809e2000-07-10 19:32:19 +0000302 # ...add other platform-specific processing here, if
303 # necessary...
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000304 return code, encoding
Fredrik Lundh04661322000-07-09 23:16:10 +0000305
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000306 # fall back on POSIX behaviour
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000307 import os
308 lookup = os.environ.get
309 for variable in envvars:
310 localename = lookup(variable,None)
311 if localename is not None:
312 break
313 else:
314 localename = 'C'
315 return _parse_localename(localename)
316
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000317
318def getlocale(category=LC_CTYPE):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000319
320 """ Returns the current setting for the given locale category as
321 tuple (language code, encoding).
322
323 category may be one of the LC_* value except LC_ALL. It
324 defaults to LC_CTYPE.
325
326 Except for the code 'C', the language code corresponds to RFC
327 1766. code and encoding can be None in case the values cannot
328 be determined.
329
330 """
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000331 localename = _setlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000332 if category == LC_ALL and ';' in localename:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000333 raise TypeError, 'category LC_ALL is not supported'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000334 return _parse_localename(localename)
335
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000336def setlocale(category, locale=None):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000337
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000338 """ Set the locale for the given category. The locale can be
339 a string, a locale tuple (language code, encoding), or None.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000340
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000341 Locale tuples are converted to strings the locale aliasing
342 engine. Locale strings are passed directly to the C lib.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000343
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000344 category may be given as one of the LC_* values.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000345
346 """
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000347 if locale and type(locale) is not type(""):
348 # convert to string
349 locale = normalize(_build_localename(locale))
350 return _setlocale(category, locale)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000351
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000352def resetlocale(category=LC_ALL):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000353
354 """ Sets the locale for category to the default setting.
355
356 The default setting is determined by calling
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000357 getdefaultlocale(). category defaults to LC_ALL.
358
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000359 """
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000360 _setlocale(category, _build_localename(getdefaultlocale()))
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000361
362### Database
363#
364# The following data was extracted from the locale.alias file which
365# comes with X11 and then hand edited removing the explicit encoding
366# definitions and adding some more aliases. The file is usually
367# available as /usr/lib/X11/locale/locale.alias.
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000368#
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000369
370#
371# The encoding_alias table maps lowercase encoding alias names to C
372# locale encoding names (case-sensitive).
373#
374encoding_alias = {
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000375 '437': 'C',
376 'c': 'C',
377 'iso8859': 'ISO8859-1',
378 '8859': 'ISO8859-1',
379 '88591': 'ISO8859-1',
380 'ascii': 'ISO8859-1',
381 'en': 'ISO8859-1',
382 'iso88591': 'ISO8859-1',
383 'iso_8859-1': 'ISO8859-1',
384 '885915': 'ISO8859-15',
385 'iso885915': 'ISO8859-15',
386 'iso_8859-15': 'ISO8859-15',
387 'iso8859-2': 'ISO8859-2',
388 'iso88592': 'ISO8859-2',
389 'iso_8859-2': 'ISO8859-2',
390 'iso88595': 'ISO8859-5',
391 'iso88596': 'ISO8859-6',
392 'iso88597': 'ISO8859-7',
393 'iso88598': 'ISO8859-8',
394 'iso88599': 'ISO8859-9',
395 'iso-2022-jp': 'JIS7',
396 'jis': 'JIS7',
397 'jis7': 'JIS7',
398 'sjis': 'SJIS',
399 'tis620': 'TACTIS',
400 'ajec': 'eucJP',
401 'eucjp': 'eucJP',
402 'ujis': 'eucJP',
403 'utf-8': 'utf',
404 'utf8': 'utf',
405 'utf8@ucs4': 'utf',
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000406}
407
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000408#
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000409# The locale_alias table maps lowercase alias names to C locale names
410# (case-sensitive). Encodings are always separated from the locale
411# name using a dot ('.'); they should only be given in case the
412# language name is needed to interpret the given encoding alias
413# correctly (CJK codes often have this need).
414#
415locale_alias = {
416 'american': 'en_US.ISO8859-1',
417 'ar': 'ar_AA.ISO8859-6',
418 'ar_aa': 'ar_AA.ISO8859-6',
419 'ar_sa': 'ar_SA.ISO8859-6',
420 'arabic': 'ar_AA.ISO8859-6',
421 'bg': 'bg_BG.ISO8859-5',
422 'bg_bg': 'bg_BG.ISO8859-5',
423 'bulgarian': 'bg_BG.ISO8859-5',
424 'c-french': 'fr_CA.ISO8859-1',
425 'c': 'C',
426 'c_c': 'C',
427 'cextend': 'en_US.ISO8859-1',
428 'chinese-s': 'zh_CN.eucCN',
429 'chinese-t': 'zh_TW.eucTW',
430 'croatian': 'hr_HR.ISO8859-2',
431 'cs': 'cs_CZ.ISO8859-2',
432 'cs_cs': 'cs_CZ.ISO8859-2',
433 'cs_cz': 'cs_CZ.ISO8859-2',
434 'cz': 'cz_CZ.ISO8859-2',
435 'cz_cz': 'cz_CZ.ISO8859-2',
436 'czech': 'cs_CS.ISO8859-2',
437 'da': 'da_DK.ISO8859-1',
438 'da_dk': 'da_DK.ISO8859-1',
439 'danish': 'da_DK.ISO8859-1',
440 'de': 'de_DE.ISO8859-1',
441 'de_at': 'de_AT.ISO8859-1',
442 'de_ch': 'de_CH.ISO8859-1',
443 'de_de': 'de_DE.ISO8859-1',
444 'dutch': 'nl_BE.ISO8859-1',
445 'ee': 'ee_EE.ISO8859-4',
446 'el': 'el_GR.ISO8859-7',
447 'el_gr': 'el_GR.ISO8859-7',
448 'en': 'en_US.ISO8859-1',
449 'en_au': 'en_AU.ISO8859-1',
450 'en_ca': 'en_CA.ISO8859-1',
451 'en_gb': 'en_GB.ISO8859-1',
452 'en_ie': 'en_IE.ISO8859-1',
453 'en_nz': 'en_NZ.ISO8859-1',
454 'en_uk': 'en_GB.ISO8859-1',
455 'en_us': 'en_US.ISO8859-1',
456 'eng_gb': 'en_GB.ISO8859-1',
457 'english': 'en_EN.ISO8859-1',
458 'english_uk': 'en_GB.ISO8859-1',
459 'english_united-states': 'en_US.ISO8859-1',
460 'english_us': 'en_US.ISO8859-1',
461 'es': 'es_ES.ISO8859-1',
462 'es_ar': 'es_AR.ISO8859-1',
463 'es_bo': 'es_BO.ISO8859-1',
464 'es_cl': 'es_CL.ISO8859-1',
465 'es_co': 'es_CO.ISO8859-1',
466 'es_cr': 'es_CR.ISO8859-1',
467 'es_ec': 'es_EC.ISO8859-1',
468 'es_es': 'es_ES.ISO8859-1',
469 'es_gt': 'es_GT.ISO8859-1',
470 'es_mx': 'es_MX.ISO8859-1',
471 'es_ni': 'es_NI.ISO8859-1',
472 'es_pa': 'es_PA.ISO8859-1',
473 'es_pe': 'es_PE.ISO8859-1',
474 'es_py': 'es_PY.ISO8859-1',
475 'es_sv': 'es_SV.ISO8859-1',
476 'es_uy': 'es_UY.ISO8859-1',
477 'es_ve': 'es_VE.ISO8859-1',
478 'et': 'et_EE.ISO8859-4',
479 'et_ee': 'et_EE.ISO8859-4',
480 'fi': 'fi_FI.ISO8859-1',
481 'fi_fi': 'fi_FI.ISO8859-1',
482 'finnish': 'fi_FI.ISO8859-1',
483 'fr': 'fr_FR.ISO8859-1',
484 'fr_be': 'fr_BE.ISO8859-1',
485 'fr_ca': 'fr_CA.ISO8859-1',
486 'fr_ch': 'fr_CH.ISO8859-1',
487 'fr_fr': 'fr_FR.ISO8859-1',
488 'fre_fr': 'fr_FR.ISO8859-1',
489 'french': 'fr_FR.ISO8859-1',
490 'french_france': 'fr_FR.ISO8859-1',
491 'ger_de': 'de_DE.ISO8859-1',
492 'german': 'de_DE.ISO8859-1',
493 'german_germany': 'de_DE.ISO8859-1',
494 'greek': 'el_GR.ISO8859-7',
495 'hebrew': 'iw_IL.ISO8859-8',
496 'hr': 'hr_HR.ISO8859-2',
497 'hr_hr': 'hr_HR.ISO8859-2',
498 'hu': 'hu_HU.ISO8859-2',
499 'hu_hu': 'hu_HU.ISO8859-2',
500 'hungarian': 'hu_HU.ISO8859-2',
501 'icelandic': 'is_IS.ISO8859-1',
502 'id': 'id_ID.ISO8859-1',
503 'id_id': 'id_ID.ISO8859-1',
504 'is': 'is_IS.ISO8859-1',
505 'is_is': 'is_IS.ISO8859-1',
506 'iso-8859-1': 'en_US.ISO8859-1',
507 'iso-8859-15': 'en_US.ISO8859-15',
508 'iso8859-1': 'en_US.ISO8859-1',
509 'iso8859-15': 'en_US.ISO8859-15',
510 'iso_8859_1': 'en_US.ISO8859-1',
511 'iso_8859_15': 'en_US.ISO8859-15',
512 'it': 'it_IT.ISO8859-1',
513 'it_ch': 'it_CH.ISO8859-1',
514 'it_it': 'it_IT.ISO8859-1',
515 'italian': 'it_IT.ISO8859-1',
516 'iw': 'iw_IL.ISO8859-8',
517 'iw_il': 'iw_IL.ISO8859-8',
518 'ja': 'ja_JP.eucJP',
519 'ja.jis': 'ja_JP.JIS7',
520 'ja.sjis': 'ja_JP.SJIS',
521 'ja_jp': 'ja_JP.eucJP',
522 'ja_jp.ajec': 'ja_JP.eucJP',
523 'ja_jp.euc': 'ja_JP.eucJP',
524 'ja_jp.eucjp': 'ja_JP.eucJP',
525 'ja_jp.iso-2022-jp': 'ja_JP.JIS7',
526 'ja_jp.jis': 'ja_JP.JIS7',
527 'ja_jp.jis7': 'ja_JP.JIS7',
528 'ja_jp.mscode': 'ja_JP.SJIS',
529 'ja_jp.sjis': 'ja_JP.SJIS',
530 'ja_jp.ujis': 'ja_JP.eucJP',
531 'japan': 'ja_JP.eucJP',
532 'japanese': 'ja_JP.SJIS',
533 'japanese-euc': 'ja_JP.eucJP',
534 'japanese.euc': 'ja_JP.eucJP',
535 'jp_jp': 'ja_JP.eucJP',
536 'ko': 'ko_KR.eucKR',
537 'ko_kr': 'ko_KR.eucKR',
538 'ko_kr.euc': 'ko_KR.eucKR',
539 'korean': 'ko_KR.eucKR',
540 'lt': 'lt_LT.ISO8859-4',
541 'lv': 'lv_LV.ISO8859-4',
542 'mk': 'mk_MK.ISO8859-5',
543 'mk_mk': 'mk_MK.ISO8859-5',
544 'nl': 'nl_NL.ISO8859-1',
545 'nl_be': 'nl_BE.ISO8859-1',
546 'nl_nl': 'nl_NL.ISO8859-1',
547 'no': 'no_NO.ISO8859-1',
548 'no_no': 'no_NO.ISO8859-1',
549 'norwegian': 'no_NO.ISO8859-1',
550 'pl': 'pl_PL.ISO8859-2',
551 'pl_pl': 'pl_PL.ISO8859-2',
552 'polish': 'pl_PL.ISO8859-2',
553 'portuguese': 'pt_PT.ISO8859-1',
554 'portuguese_brazil': 'pt_BR.ISO8859-1',
555 'posix': 'C',
556 'posix-utf2': 'C',
557 'pt': 'pt_PT.ISO8859-1',
558 'pt_br': 'pt_BR.ISO8859-1',
559 'pt_pt': 'pt_PT.ISO8859-1',
560 'ro': 'ro_RO.ISO8859-2',
561 'ro_ro': 'ro_RO.ISO8859-2',
562 'ru': 'ru_RU.ISO8859-5',
563 'ru_ru': 'ru_RU.ISO8859-5',
564 'rumanian': 'ro_RO.ISO8859-2',
565 'russian': 'ru_RU.ISO8859-5',
566 'serbocroatian': 'sh_YU.ISO8859-2',
567 'sh': 'sh_YU.ISO8859-2',
568 'sh_hr': 'sh_HR.ISO8859-2',
569 'sh_sp': 'sh_YU.ISO8859-2',
570 'sh_yu': 'sh_YU.ISO8859-2',
571 'sk': 'sk_SK.ISO8859-2',
572 'sk_sk': 'sk_SK.ISO8859-2',
573 'sl': 'sl_CS.ISO8859-2',
574 'sl_cs': 'sl_CS.ISO8859-2',
575 'sl_si': 'sl_SI.ISO8859-2',
576 'slovak': 'sk_SK.ISO8859-2',
577 'slovene': 'sl_CS.ISO8859-2',
578 'sp': 'sp_YU.ISO8859-5',
579 'sp_yu': 'sp_YU.ISO8859-5',
580 'spanish': 'es_ES.ISO8859-1',
581 'spanish_spain': 'es_ES.ISO8859-1',
582 'sr_sp': 'sr_SP.ISO8859-2',
583 'sv': 'sv_SE.ISO8859-1',
584 'sv_se': 'sv_SE.ISO8859-1',
585 'swedish': 'sv_SE.ISO8859-1',
586 'th_th': 'th_TH.TACTIS',
587 'tr': 'tr_TR.ISO8859-9',
588 'tr_tr': 'tr_TR.ISO8859-9',
589 'turkish': 'tr_TR.ISO8859-9',
590 'univ': 'en_US.utf',
591 'universal': 'en_US.utf',
592 'zh': 'zh_CN.eucCN',
593 'zh_cn': 'zh_CN.eucCN',
594 'zh_cn.big5': 'zh_TW.eucTW',
595 'zh_cn.euc': 'zh_CN.eucCN',
596 'zh_tw': 'zh_TW.eucTW',
597 'zh_tw.euc': 'zh_TW.eucTW',
598}
599
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000600#
601# this maps windows language identifiers (as used on Windows 95 and
602# earlier) to locale strings.
603#
604# NOTE: this mapping is incomplete. If your language is missing, send
605# a note with the missing language identifier and the suggested locale
606# code to Fredrik Lundh <effbot@telia.com>. Thanks /F
607
608windows_locale = {
609 0x0404: "zh_TW", # Chinese (Taiwan)
610 0x0804: "zh_CN", # Chinese (PRC)
611 0x0406: "da_DK", # Danish
612 0x0413: "nl_NL", # Dutch (Netherlands)
613 0x0409: "en_US", # English (United States)
614 0x0809: "en_UK", # English (United Kingdom)
615 0x0c09: "en_AU", # English (Australian)
616 0x1009: "en_CA", # English (Canadian)
617 0x1409: "en_NZ", # English (New Zealand)
618 0x1809: "en_IE", # English (Ireland)
619 0x1c09: "en_ZA", # English (South Africa)
620 0x040b: "fi_FI", # Finnish
621 0x040c: "fr_FR", # French (Standard)
622 0x080c: "fr_BE", # French (Belgian)
623 0x0c0c: "fr_CA", # French (Canadian)
624 0x100c: "fr_CH", # French (Switzerland)
625 0x0407: "de_DE", # German (Standard)
626 0x0408: "el_GR", # Greek
627 0x040d: "iw_IL", # Hebrew
628 0x040f: "is_IS", # Icelandic
629 0x0410: "it_IT", # Italian (Standard)
630 0x0411: "ja_JA", # Japanese
631 0x0414: "no_NO", # Norwegian (Bokmal)
632 0x0816: "pt_PT", # Portuguese (Standard)
633 0x0c0a: "es_ES", # Spanish (Modern Sort)
634 0x0441: "sw_KE", # Swahili (Kenya)
635 0x041d: "sv_SE", # Swedish
636 0x081d: "sv_FI", # Swedish (Finland)
637 0x041f: "tr_TR", # Turkish
638}
639
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000640def _print_locale():
641
642 """ Test function.
643 """
644 categories = {}
645 def _init_categories(categories=categories):
646 for k,v in globals().items():
647 if k[:3] == 'LC_':
648 categories[k] = v
649 _init_categories()
650 del categories['LC_ALL']
651
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000652 print 'Locale defaults as determined by getdefaultlocale():'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000653 print '-'*72
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000654 lang, enc = getdefaultlocale()
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000655 print 'Language: ', lang or '(undefined)'
656 print 'Encoding: ', enc or '(undefined)'
657 print
658
659 print 'Locale settings on startup:'
660 print '-'*72
661 for name,category in categories.items():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000662 print name, '...'
663 lang, enc = getlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000664 print ' Language: ', lang or '(undefined)'
665 print ' Encoding: ', enc or '(undefined)'
666 print
667
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000668 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000669 print 'Locale settings after calling resetlocale():'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000670 print '-'*72
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000671 resetlocale()
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000672 for name,category in categories.items():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000673 print name, '...'
674 lang, enc = getlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000675 print ' Language: ', lang or '(undefined)'
676 print ' Encoding: ', enc or '(undefined)'
677 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000678
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000679 try:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000680 setlocale(LC_ALL, "")
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000681 except:
682 print 'NOTE:'
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000683 print 'setlocale(LC_ALL, "") does not support the default locale'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000684 print 'given in the OS environment variables.'
685 else:
686 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000687 print 'Locale settings after calling setlocale(LC_ALL, ""):'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000688 print '-'*72
689 for name,category in categories.items():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000690 print name, '...'
691 lang, enc = getlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000692 print ' Language: ', lang or '(undefined)'
693 print ' Encoding: ', enc or '(undefined)'
694 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000695
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000696###
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000697
698if __name__=='__main__':
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000699 print 'Locale aliasing:'
700 print
701 _print_locale()
702 print
703 print 'Number formatting:'
704 print
705 _test()