blob: 401c712104aa8de99b8ed029e20c67fe51f230a7 [file] [log] [blame]
Marc-André Lemburg5431bc32000-06-07 09:11:40 +00001""" Locale support.
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00002
Marc-André Lemburg5431bc32000-06-07 09:11:40 +00003 The module provides low-level access to the C lib's locale APIs
4 and adds high level number formatting APIs as well as a locale
5 aliasing engine to complement these.
6
7 The aliasing engine includes support for many commonly used locale
8 names and maps them to values suitable for passing to the C lib's
9 setlocale() function. It also includes default encodings for all
10 supported locale names.
11
12"""
13
Fredrik Lundh6c86b992000-07-09 17:12:58 +000014import string, sys
Marc-André Lemburg5431bc32000-06-07 09:11:40 +000015
Fredrik Lundh6c86b992000-07-09 17:12:58 +000016# Try importing the _locale module.
17#
18# If this fails, fall back on a basic 'C' locale emulation.
19#
Guido van Rossumeef1d4e1997-11-19 19:01:43 +000020
Marc-André Lemburg23481142000-06-08 17:49:41 +000021try:
Fredrik Lundh6c86b992000-07-09 17:12:58 +000022
Marc-André Lemburg23481142000-06-08 17:49:41 +000023 from _locale import *
24
25except ImportError:
26
Fredrik Lundh6c86b992000-07-09 17:12:58 +000027 # Locale emulation
28
Marc-André Lemburg23481142000-06-08 17:49:41 +000029 CHAR_MAX = 127
30 LC_ALL = 6
31 LC_COLLATE = 3
32 LC_CTYPE = 0
33 LC_MESSAGES = 5
34 LC_MONETARY = 4
35 LC_NUMERIC = 1
36 LC_TIME = 2
37 Error = ValueError
38
39 def localeconv():
Fredrik Lundh6c86b992000-07-09 17:12:58 +000040 """ localeconv() -> dict.
Marc-André Lemburg23481142000-06-08 17:49:41 +000041 Returns numeric and monetary locale-specific parameters.
42 """
43 # 'C' locale default values
44 return {'grouping': [127],
45 'currency_symbol': '',
46 'n_sign_posn': 127,
Fredrik Lundh6c86b992000-07-09 17:12:58 +000047 'p_cs_precedes': 127,
48 'n_cs_precedes': 127,
49 'mon_grouping': [],
Marc-André Lemburg23481142000-06-08 17:49:41 +000050 'n_sep_by_space': 127,
51 'decimal_point': '.',
52 'negative_sign': '',
53 'positive_sign': '',
Fredrik Lundh6c86b992000-07-09 17:12:58 +000054 'p_sep_by_space': 127,
Marc-André Lemburg23481142000-06-08 17:49:41 +000055 'int_curr_symbol': '',
Fredrik Lundh6c86b992000-07-09 17:12:58 +000056 'p_sign_posn': 127,
Marc-André Lemburg23481142000-06-08 17:49:41 +000057 'thousands_sep': '',
Fredrik Lundh6c86b992000-07-09 17:12:58 +000058 'mon_thousands_sep': '',
59 'frac_digits': 127,
Marc-André Lemburg23481142000-06-08 17:49:41 +000060 'mon_decimal_point': '',
61 'int_frac_digits': 127}
Fredrik Lundh6c86b992000-07-09 17:12:58 +000062
Marc-André Lemburg23481142000-06-08 17:49:41 +000063 def setlocale(category, value=None):
Fredrik Lundh6c86b992000-07-09 17:12:58 +000064 """ setlocale(integer,string=None) -> string.
Marc-André Lemburg23481142000-06-08 17:49:41 +000065 Activates/queries locale processing.
66 """
67 if value is not None and \
68 value is not 'C':
Fredrik Lundh6c86b992000-07-09 17:12:58 +000069 raise Error, '_locale emulation only supports "C" locale'
Marc-André Lemburg23481142000-06-08 17:49:41 +000070 return 'C'
71
72 def strcoll(a,b):
Fredrik Lundh6c86b992000-07-09 17:12:58 +000073 """ strcoll(string,string) -> int.
Marc-André Lemburg23481142000-06-08 17:49:41 +000074 Compares two strings according to the locale.
75 """
76 return cmp(a,b)
77
78 def strxfrm(s):
Fredrik Lundh6c86b992000-07-09 17:12:58 +000079 """ strxfrm(string) -> string.
Marc-André Lemburg23481142000-06-08 17:49:41 +000080 Returns a string that behaves for cmp locale-aware.
81 """
82 return s
Marc-André Lemburg5431bc32000-06-07 09:11:40 +000083
84### Number formatting APIs
85
86# Author: Martin von Loewis
Guido van Rossumeef1d4e1997-11-19 19:01:43 +000087
88#perform the grouping from right to left
89def _group(s):
90 conv=localeconv()
91 grouping=conv['grouping']
92 if not grouping:return s
93 result=""
94 while s and grouping:
Fredrik Lundh6c86b992000-07-09 17:12:58 +000095 # if grouping is -1, we are done
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000096 if grouping[0]==CHAR_MAX:
97 break
98 # 0: re-use last group ad infinitum
99 elif grouping[0]!=0:
100 #process last group
101 group=grouping[0]
102 grouping=grouping[1:]
103 if result:
104 result=s[-group:]+conv['thousands_sep']+result
105 else:
106 result=s[-group:]
107 s=s[:-group]
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000108 if not result:
109 return s
110 if s:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000111 result=s+conv['thousands_sep']+result
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000112 return result
113
114def format(f,val,grouping=0):
115 """Formats a value in the same way that the % formatting would use,
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000116 but takes the current locale into account.
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000117 Grouping is applied if the third parameter is true."""
118 result = f % val
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000119 fields = string.split(result, ".")
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000120 if grouping:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000121 fields[0]=_group(fields[0])
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000122 if len(fields)==2:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000123 return fields[0]+localeconv()['decimal_point']+fields[1]
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000124 elif len(fields)==1:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000125 return fields[0]
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000126 else:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000127 raise Error, "Too many decimal points in result string"
128
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000129def str(val):
130 """Convert float to integer, taking the locale into account."""
131 return format("%.12g",val)
132
133def atof(str,func=string.atof):
134 "Parses a string as a float according to the locale settings."
135 #First, get rid of the grouping
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000136 ts = localeconv()['thousands_sep']
137 if ts:
138 s=string.split(str,ts)
139 str=string.join(s, "")
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000140 #next, replace the decimal point with a dot
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000141 dd = localeconv()['decimal_point']
142 if dd:
143 s=string.split(str,dd)
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000144 str=string.join(s, '.')
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000145 #finally, parse the string
146 return func(str)
147
148def atoi(str):
149 "Converts a string to an integer according to the locale settings."
150 return atof(str,string.atoi)
151
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000152def _test():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000153 setlocale(LC_ALL, "")
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000154 #do grouping
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000155 s1=format("%d", 123456789,1)
156 print s1, "is", atoi(s1)
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000157 #standard formatting
158 s1=str(3.14)
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000159 print s1, "is", atof(s1)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000160
161### Locale name aliasing engine
162
163# Author: Marc-Andre Lemburg, mal@lemburg.com
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000164# Various tweaks by Fredrik Lundh <effbot@telia.com>
165
166# store away the low-level version of setlocale (it's
167# overridden below)
168_setlocale = setlocale
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000169
170def normalize(localename):
171
172 """ Returns a normalized locale code for the given locale
173 name.
174
175 The returned locale code is formatted for use with
176 setlocale().
177
178 If normalization fails, the original name is returned
179 unchanged.
180
181 If the given encoding is not known, the function defaults to
182 the default encoding for the locale code just like setlocale()
183 does.
184
185 """
186 # Normalize the locale name and extract the encoding
187 fullname = string.lower(localename)
188 if ':' in fullname:
189 # ':' is sometimes used as encoding delimiter.
190 fullname = string.replace(fullname, ':', '.')
191 if '.' in fullname:
192 langname, encoding = string.split(fullname, '.')[:2]
193 fullname = langname + '.' + encoding
194 else:
195 langname = fullname
196 encoding = ''
197
198 # First lookup: fullname (possibly with encoding)
199 code = locale_alias.get(fullname, None)
200 if code is not None:
201 return code
202
203 # Second try: langname (without encoding)
204 code = locale_alias.get(langname, None)
205 if code is not None:
206 if '.' in code:
207 langname, defenc = string.split(code, '.')
208 else:
209 langname = code
210 defenc = ''
211 if encoding:
212 encoding = encoding_alias.get(encoding, encoding)
213 else:
214 encoding = defenc
215 if encoding:
216 return langname + '.' + encoding
217 else:
218 return langname
219
220 else:
221 return localename
222
223def _parse_localename(localename):
224
225 """ Parses the locale code for localename and returns the
226 result as tuple (language code, encoding).
227
228 The localename is normalized and passed through the locale
229 alias engine. A ValueError is raised in case the locale name
230 cannot be parsed.
231
232 The language code corresponds to RFC 1766. code and encoding
233 can be None in case the values cannot be determined or are
Jeremy Hyltona05e2932000-06-28 14:48:01 +0000234 unknown to this implementation.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000235
236 """
237 code = normalize(localename)
238 if '.' in code:
239 return string.split(code, '.')[:2]
240 elif code == 'C':
241 return None, None
242 else:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000243 raise ValueError, 'unknown locale: %s' % localename
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000244 return l
245
246def _build_localename(localetuple):
247
248 """ Builds a locale code from the given tuple (language code,
249 encoding).
250
251 No aliasing or normalizing takes place.
252
253 """
254 language, encoding = localetuple
255 if language is None:
256 language = 'C'
257 if encoding is None:
258 return language
259 else:
260 return language + '.' + encoding
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000261
262def getdefaultlocale(envvars=('LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000263
264 """ Tries to determine the default locale settings and returns
265 them as tuple (language code, encoding).
266
267 According to POSIX, a program which has not called
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000268 setlocale(LC_ALL, "") runs using the portable 'C' locale.
269 Calling setlocale(LC_ALL, "") lets it use the default locale as
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000270 defined by the LANG variable. Since we don't want to interfere
Thomas Wouters7e474022000-07-16 12:04:32 +0000271 with the current locale setting we thus emulate the behavior
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000272 in the way described above.
273
274 To maintain compatibility with other platforms, not only the
275 LANG variable is tested, but a list of variables given as
276 envvars parameter. The first found to be defined will be
277 used. envvars defaults to the search path used in GNU gettext;
278 it must always contain the variable name 'LANG'.
279
280 Except for the code 'C', the language code corresponds to RFC
281 1766. code and encoding can be None in case the values cannot
282 be determined.
283
284 """
Fredrik Lundh04661322000-07-09 23:16:10 +0000285
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000286 try:
287 # check if it's supported by the _locale module
288 import _locale
289 code, encoding = _locale._getdefaultlocale()
Fredrik Lundh04661322000-07-09 23:16:10 +0000290 except (ImportError, AttributeError):
291 pass
292 else:
Fredrik Lundh663809e2000-07-10 19:32:19 +0000293 # make sure the code/encoding values are valid
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000294 if sys.platform == "win32" and code and code[:2] == "0x":
295 # map windows language identifier to language name
296 code = windows_locale.get(int(code, 0))
Fredrik Lundh663809e2000-07-10 19:32:19 +0000297 # ...add other platform-specific processing here, if
298 # necessary...
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000299 return code, encoding
Fredrik Lundh04661322000-07-09 23:16:10 +0000300
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000301 # fall back on POSIX behaviour
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000302 import os
303 lookup = os.environ.get
304 for variable in envvars:
305 localename = lookup(variable,None)
306 if localename is not None:
307 break
308 else:
309 localename = 'C'
310 return _parse_localename(localename)
311
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000312
313def getlocale(category=LC_CTYPE):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000314
315 """ Returns the current setting for the given locale category as
316 tuple (language code, encoding).
317
318 category may be one of the LC_* value except LC_ALL. It
319 defaults to LC_CTYPE.
320
321 Except for the code 'C', the language code corresponds to RFC
322 1766. code and encoding can be None in case the values cannot
323 be determined.
324
325 """
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000326 localename = _setlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000327 if category == LC_ALL and ';' in localename:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000328 raise TypeError, 'category LC_ALL is not supported'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000329 return _parse_localename(localename)
330
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000331def setlocale(category, locale=None):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000332
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000333 """ Set the locale for the given category. The locale can be
334 a string, a locale tuple (language code, encoding), or None.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000335
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000336 Locale tuples are converted to strings the locale aliasing
337 engine. Locale strings are passed directly to the C lib.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000338
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000339 category may be given as one of the LC_* values.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000340
341 """
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000342 if locale and type(locale) is not type(""):
343 # convert to string
344 locale = normalize(_build_localename(locale))
345 return _setlocale(category, locale)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000346
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000347def resetlocale(category=LC_ALL):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000348
349 """ Sets the locale for category to the default setting.
350
351 The default setting is determined by calling
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000352 getdefaultlocale(). category defaults to LC_ALL.
353
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000354 """
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000355 _setlocale(category, _build_localename(getdefaultlocale()))
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000356
357### Database
358#
359# The following data was extracted from the locale.alias file which
360# comes with X11 and then hand edited removing the explicit encoding
361# definitions and adding some more aliases. The file is usually
362# available as /usr/lib/X11/locale/locale.alias.
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000363#
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000364
365#
366# The encoding_alias table maps lowercase encoding alias names to C
367# locale encoding names (case-sensitive).
368#
369encoding_alias = {
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000370 '437': 'C',
371 'c': 'C',
372 'iso8859': 'ISO8859-1',
373 '8859': 'ISO8859-1',
374 '88591': 'ISO8859-1',
375 'ascii': 'ISO8859-1',
376 'en': 'ISO8859-1',
377 'iso88591': 'ISO8859-1',
378 'iso_8859-1': 'ISO8859-1',
379 '885915': 'ISO8859-15',
380 'iso885915': 'ISO8859-15',
381 'iso_8859-15': 'ISO8859-15',
382 'iso8859-2': 'ISO8859-2',
383 'iso88592': 'ISO8859-2',
384 'iso_8859-2': 'ISO8859-2',
385 'iso88595': 'ISO8859-5',
386 'iso88596': 'ISO8859-6',
387 'iso88597': 'ISO8859-7',
388 'iso88598': 'ISO8859-8',
389 'iso88599': 'ISO8859-9',
390 'iso-2022-jp': 'JIS7',
391 'jis': 'JIS7',
392 'jis7': 'JIS7',
393 'sjis': 'SJIS',
394 'tis620': 'TACTIS',
395 'ajec': 'eucJP',
396 'eucjp': 'eucJP',
397 'ujis': 'eucJP',
398 'utf-8': 'utf',
399 'utf8': 'utf',
400 'utf8@ucs4': 'utf',
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000401}
402
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000403#
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000404# The locale_alias table maps lowercase alias names to C locale names
405# (case-sensitive). Encodings are always separated from the locale
406# name using a dot ('.'); they should only be given in case the
407# language name is needed to interpret the given encoding alias
408# correctly (CJK codes often have this need).
409#
410locale_alias = {
411 'american': 'en_US.ISO8859-1',
412 'ar': 'ar_AA.ISO8859-6',
413 'ar_aa': 'ar_AA.ISO8859-6',
414 'ar_sa': 'ar_SA.ISO8859-6',
415 'arabic': 'ar_AA.ISO8859-6',
416 'bg': 'bg_BG.ISO8859-5',
417 'bg_bg': 'bg_BG.ISO8859-5',
418 'bulgarian': 'bg_BG.ISO8859-5',
419 'c-french': 'fr_CA.ISO8859-1',
420 'c': 'C',
421 'c_c': 'C',
422 'cextend': 'en_US.ISO8859-1',
423 'chinese-s': 'zh_CN.eucCN',
424 'chinese-t': 'zh_TW.eucTW',
425 'croatian': 'hr_HR.ISO8859-2',
426 'cs': 'cs_CZ.ISO8859-2',
427 'cs_cs': 'cs_CZ.ISO8859-2',
428 'cs_cz': 'cs_CZ.ISO8859-2',
429 'cz': 'cz_CZ.ISO8859-2',
430 'cz_cz': 'cz_CZ.ISO8859-2',
431 'czech': 'cs_CS.ISO8859-2',
432 'da': 'da_DK.ISO8859-1',
433 'da_dk': 'da_DK.ISO8859-1',
434 'danish': 'da_DK.ISO8859-1',
435 'de': 'de_DE.ISO8859-1',
436 'de_at': 'de_AT.ISO8859-1',
437 'de_ch': 'de_CH.ISO8859-1',
438 'de_de': 'de_DE.ISO8859-1',
439 'dutch': 'nl_BE.ISO8859-1',
440 'ee': 'ee_EE.ISO8859-4',
441 'el': 'el_GR.ISO8859-7',
442 'el_gr': 'el_GR.ISO8859-7',
443 'en': 'en_US.ISO8859-1',
444 'en_au': 'en_AU.ISO8859-1',
445 'en_ca': 'en_CA.ISO8859-1',
446 'en_gb': 'en_GB.ISO8859-1',
447 'en_ie': 'en_IE.ISO8859-1',
448 'en_nz': 'en_NZ.ISO8859-1',
449 'en_uk': 'en_GB.ISO8859-1',
450 'en_us': 'en_US.ISO8859-1',
451 'eng_gb': 'en_GB.ISO8859-1',
452 'english': 'en_EN.ISO8859-1',
453 'english_uk': 'en_GB.ISO8859-1',
454 'english_united-states': 'en_US.ISO8859-1',
455 'english_us': 'en_US.ISO8859-1',
456 'es': 'es_ES.ISO8859-1',
457 'es_ar': 'es_AR.ISO8859-1',
458 'es_bo': 'es_BO.ISO8859-1',
459 'es_cl': 'es_CL.ISO8859-1',
460 'es_co': 'es_CO.ISO8859-1',
461 'es_cr': 'es_CR.ISO8859-1',
462 'es_ec': 'es_EC.ISO8859-1',
463 'es_es': 'es_ES.ISO8859-1',
464 'es_gt': 'es_GT.ISO8859-1',
465 'es_mx': 'es_MX.ISO8859-1',
466 'es_ni': 'es_NI.ISO8859-1',
467 'es_pa': 'es_PA.ISO8859-1',
468 'es_pe': 'es_PE.ISO8859-1',
469 'es_py': 'es_PY.ISO8859-1',
470 'es_sv': 'es_SV.ISO8859-1',
471 'es_uy': 'es_UY.ISO8859-1',
472 'es_ve': 'es_VE.ISO8859-1',
473 'et': 'et_EE.ISO8859-4',
474 'et_ee': 'et_EE.ISO8859-4',
475 'fi': 'fi_FI.ISO8859-1',
476 'fi_fi': 'fi_FI.ISO8859-1',
477 'finnish': 'fi_FI.ISO8859-1',
478 'fr': 'fr_FR.ISO8859-1',
479 'fr_be': 'fr_BE.ISO8859-1',
480 'fr_ca': 'fr_CA.ISO8859-1',
481 'fr_ch': 'fr_CH.ISO8859-1',
482 'fr_fr': 'fr_FR.ISO8859-1',
483 'fre_fr': 'fr_FR.ISO8859-1',
484 'french': 'fr_FR.ISO8859-1',
485 'french_france': 'fr_FR.ISO8859-1',
486 'ger_de': 'de_DE.ISO8859-1',
487 'german': 'de_DE.ISO8859-1',
488 'german_germany': 'de_DE.ISO8859-1',
489 'greek': 'el_GR.ISO8859-7',
490 'hebrew': 'iw_IL.ISO8859-8',
491 'hr': 'hr_HR.ISO8859-2',
492 'hr_hr': 'hr_HR.ISO8859-2',
493 'hu': 'hu_HU.ISO8859-2',
494 'hu_hu': 'hu_HU.ISO8859-2',
495 'hungarian': 'hu_HU.ISO8859-2',
496 'icelandic': 'is_IS.ISO8859-1',
497 'id': 'id_ID.ISO8859-1',
498 'id_id': 'id_ID.ISO8859-1',
499 'is': 'is_IS.ISO8859-1',
500 'is_is': 'is_IS.ISO8859-1',
501 'iso-8859-1': 'en_US.ISO8859-1',
502 'iso-8859-15': 'en_US.ISO8859-15',
503 'iso8859-1': 'en_US.ISO8859-1',
504 'iso8859-15': 'en_US.ISO8859-15',
505 'iso_8859_1': 'en_US.ISO8859-1',
506 'iso_8859_15': 'en_US.ISO8859-15',
507 'it': 'it_IT.ISO8859-1',
508 'it_ch': 'it_CH.ISO8859-1',
509 'it_it': 'it_IT.ISO8859-1',
510 'italian': 'it_IT.ISO8859-1',
511 'iw': 'iw_IL.ISO8859-8',
512 'iw_il': 'iw_IL.ISO8859-8',
513 'ja': 'ja_JP.eucJP',
514 'ja.jis': 'ja_JP.JIS7',
515 'ja.sjis': 'ja_JP.SJIS',
516 'ja_jp': 'ja_JP.eucJP',
517 'ja_jp.ajec': 'ja_JP.eucJP',
518 'ja_jp.euc': 'ja_JP.eucJP',
519 'ja_jp.eucjp': 'ja_JP.eucJP',
520 'ja_jp.iso-2022-jp': 'ja_JP.JIS7',
521 'ja_jp.jis': 'ja_JP.JIS7',
522 'ja_jp.jis7': 'ja_JP.JIS7',
523 'ja_jp.mscode': 'ja_JP.SJIS',
524 'ja_jp.sjis': 'ja_JP.SJIS',
525 'ja_jp.ujis': 'ja_JP.eucJP',
526 'japan': 'ja_JP.eucJP',
527 'japanese': 'ja_JP.SJIS',
528 'japanese-euc': 'ja_JP.eucJP',
529 'japanese.euc': 'ja_JP.eucJP',
530 'jp_jp': 'ja_JP.eucJP',
531 'ko': 'ko_KR.eucKR',
532 'ko_kr': 'ko_KR.eucKR',
533 'ko_kr.euc': 'ko_KR.eucKR',
534 'korean': 'ko_KR.eucKR',
535 'lt': 'lt_LT.ISO8859-4',
536 'lv': 'lv_LV.ISO8859-4',
537 'mk': 'mk_MK.ISO8859-5',
538 'mk_mk': 'mk_MK.ISO8859-5',
539 'nl': 'nl_NL.ISO8859-1',
540 'nl_be': 'nl_BE.ISO8859-1',
541 'nl_nl': 'nl_NL.ISO8859-1',
542 'no': 'no_NO.ISO8859-1',
543 'no_no': 'no_NO.ISO8859-1',
544 'norwegian': 'no_NO.ISO8859-1',
545 'pl': 'pl_PL.ISO8859-2',
546 'pl_pl': 'pl_PL.ISO8859-2',
547 'polish': 'pl_PL.ISO8859-2',
548 'portuguese': 'pt_PT.ISO8859-1',
549 'portuguese_brazil': 'pt_BR.ISO8859-1',
550 'posix': 'C',
551 'posix-utf2': 'C',
552 'pt': 'pt_PT.ISO8859-1',
553 'pt_br': 'pt_BR.ISO8859-1',
554 'pt_pt': 'pt_PT.ISO8859-1',
555 'ro': 'ro_RO.ISO8859-2',
556 'ro_ro': 'ro_RO.ISO8859-2',
557 'ru': 'ru_RU.ISO8859-5',
558 'ru_ru': 'ru_RU.ISO8859-5',
559 'rumanian': 'ro_RO.ISO8859-2',
560 'russian': 'ru_RU.ISO8859-5',
561 'serbocroatian': 'sh_YU.ISO8859-2',
562 'sh': 'sh_YU.ISO8859-2',
563 'sh_hr': 'sh_HR.ISO8859-2',
564 'sh_sp': 'sh_YU.ISO8859-2',
565 'sh_yu': 'sh_YU.ISO8859-2',
566 'sk': 'sk_SK.ISO8859-2',
567 'sk_sk': 'sk_SK.ISO8859-2',
568 'sl': 'sl_CS.ISO8859-2',
569 'sl_cs': 'sl_CS.ISO8859-2',
570 'sl_si': 'sl_SI.ISO8859-2',
571 'slovak': 'sk_SK.ISO8859-2',
572 'slovene': 'sl_CS.ISO8859-2',
573 'sp': 'sp_YU.ISO8859-5',
574 'sp_yu': 'sp_YU.ISO8859-5',
575 'spanish': 'es_ES.ISO8859-1',
576 'spanish_spain': 'es_ES.ISO8859-1',
577 'sr_sp': 'sr_SP.ISO8859-2',
578 'sv': 'sv_SE.ISO8859-1',
579 'sv_se': 'sv_SE.ISO8859-1',
580 'swedish': 'sv_SE.ISO8859-1',
581 'th_th': 'th_TH.TACTIS',
582 'tr': 'tr_TR.ISO8859-9',
583 'tr_tr': 'tr_TR.ISO8859-9',
584 'turkish': 'tr_TR.ISO8859-9',
585 'univ': 'en_US.utf',
586 'universal': 'en_US.utf',
587 'zh': 'zh_CN.eucCN',
588 'zh_cn': 'zh_CN.eucCN',
589 'zh_cn.big5': 'zh_TW.eucTW',
590 'zh_cn.euc': 'zh_CN.eucCN',
591 'zh_tw': 'zh_TW.eucTW',
592 'zh_tw.euc': 'zh_TW.eucTW',
593}
594
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000595#
596# this maps windows language identifiers (as used on Windows 95 and
597# earlier) to locale strings.
598#
599# NOTE: this mapping is incomplete. If your language is missing, send
600# a note with the missing language identifier and the suggested locale
601# code to Fredrik Lundh <effbot@telia.com>. Thanks /F
602
603windows_locale = {
604 0x0404: "zh_TW", # Chinese (Taiwan)
605 0x0804: "zh_CN", # Chinese (PRC)
606 0x0406: "da_DK", # Danish
607 0x0413: "nl_NL", # Dutch (Netherlands)
608 0x0409: "en_US", # English (United States)
609 0x0809: "en_UK", # English (United Kingdom)
610 0x0c09: "en_AU", # English (Australian)
611 0x1009: "en_CA", # English (Canadian)
612 0x1409: "en_NZ", # English (New Zealand)
613 0x1809: "en_IE", # English (Ireland)
614 0x1c09: "en_ZA", # English (South Africa)
615 0x040b: "fi_FI", # Finnish
616 0x040c: "fr_FR", # French (Standard)
617 0x080c: "fr_BE", # French (Belgian)
618 0x0c0c: "fr_CA", # French (Canadian)
619 0x100c: "fr_CH", # French (Switzerland)
620 0x0407: "de_DE", # German (Standard)
621 0x0408: "el_GR", # Greek
622 0x040d: "iw_IL", # Hebrew
623 0x040f: "is_IS", # Icelandic
624 0x0410: "it_IT", # Italian (Standard)
625 0x0411: "ja_JA", # Japanese
626 0x0414: "no_NO", # Norwegian (Bokmal)
627 0x0816: "pt_PT", # Portuguese (Standard)
628 0x0c0a: "es_ES", # Spanish (Modern Sort)
629 0x0441: "sw_KE", # Swahili (Kenya)
630 0x041d: "sv_SE", # Swedish
631 0x081d: "sv_FI", # Swedish (Finland)
632 0x041f: "tr_TR", # Turkish
633}
634
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000635def _print_locale():
636
637 """ Test function.
638 """
639 categories = {}
640 def _init_categories(categories=categories):
641 for k,v in globals().items():
642 if k[:3] == 'LC_':
643 categories[k] = v
644 _init_categories()
645 del categories['LC_ALL']
646
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000647 print 'Locale defaults as determined by getdefaultlocale():'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000648 print '-'*72
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000649 lang, enc = getdefaultlocale()
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000650 print 'Language: ', lang or '(undefined)'
651 print 'Encoding: ', enc or '(undefined)'
652 print
653
654 print 'Locale settings on startup:'
655 print '-'*72
656 for name,category in categories.items():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000657 print name, '...'
658 lang, enc = getlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000659 print ' Language: ', lang or '(undefined)'
660 print ' Encoding: ', enc or '(undefined)'
661 print
662
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000663 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000664 print 'Locale settings after calling resetlocale():'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000665 print '-'*72
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000666 resetlocale()
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000667 for name,category in categories.items():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000668 print name, '...'
669 lang, enc = getlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000670 print ' Language: ', lang or '(undefined)'
671 print ' Encoding: ', enc or '(undefined)'
672 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000673
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000674 try:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000675 setlocale(LC_ALL, "")
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000676 except:
677 print 'NOTE:'
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000678 print 'setlocale(LC_ALL, "") does not support the default locale'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000679 print 'given in the OS environment variables.'
680 else:
681 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000682 print 'Locale settings after calling setlocale(LC_ALL, ""):'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000683 print '-'*72
684 for name,category in categories.items():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000685 print name, '...'
686 lang, enc = getlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000687 print ' Language: ', lang or '(undefined)'
688 print ' Encoding: ', enc or '(undefined)'
689 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000690
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000691###
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000692
693if __name__=='__main__':
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000694 print 'Locale aliasing:'
695 print
696 _print_locale()
697 print
698 print 'Number formatting:'
699 print
700 _test()