blob: d26560d69d7ee83d02f7f4f3b1dc8244ac751ada [file] [log] [blame]
Marc-André Lemburg5431bc32000-06-07 09:11:40 +00001""" Locale support.
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00002
Marc-André Lemburg5431bc32000-06-07 09:11:40 +00003 The module provides low-level access to the C lib's locale APIs
4 and adds high level number formatting APIs as well as a locale
5 aliasing engine to complement these.
6
7 The aliasing engine includes support for many commonly used locale
8 names and maps them to values suitable for passing to the C lib's
9 setlocale() function. It also includes default encodings for all
10 supported locale names.
11
12"""
13
Eric S. Raymondbe9b5072001-02-09 10:48:30 +000014import sys
Marc-André Lemburg5431bc32000-06-07 09:11:40 +000015
Fredrik Lundh6c86b992000-07-09 17:12:58 +000016# Try importing the _locale module.
17#
18# If this fails, fall back on a basic 'C' locale emulation.
Guido van Rossumeef1d4e1997-11-19 19:01:43 +000019
Tim Peters1baf8292001-01-24 10:13:46 +000020# Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before
21# trying the import. So __all__ is also fiddled at the end of the file.
Skip Montanaro17ab1232001-01-24 06:27:27 +000022__all__ = ["setlocale","Error","localeconv","strcoll","strxfrm",
23 "format","str","atof","atoi","LC_CTYPE","LC_COLLATE",
Tim Peters1baf8292001-01-24 10:13:46 +000024 "LC_TIME","LC_MONETARY","LC_NUMERIC", "LC_ALL","CHAR_MAX"]
Skip Montanaro17ab1232001-01-24 06:27:27 +000025
Marc-André Lemburg23481142000-06-08 17:49:41 +000026try:
Fredrik Lundh6c86b992000-07-09 17:12:58 +000027
Marc-André Lemburg23481142000-06-08 17:49:41 +000028 from _locale import *
29
30except ImportError:
31
Fredrik Lundh6c86b992000-07-09 17:12:58 +000032 # Locale emulation
33
Marc-André Lemburg23481142000-06-08 17:49:41 +000034 CHAR_MAX = 127
35 LC_ALL = 6
36 LC_COLLATE = 3
37 LC_CTYPE = 0
38 LC_MESSAGES = 5
39 LC_MONETARY = 4
40 LC_NUMERIC = 1
41 LC_TIME = 2
42 Error = ValueError
43
44 def localeconv():
Fredrik Lundh6c86b992000-07-09 17:12:58 +000045 """ localeconv() -> dict.
Marc-André Lemburg23481142000-06-08 17:49:41 +000046 Returns numeric and monetary locale-specific parameters.
47 """
48 # 'C' locale default values
49 return {'grouping': [127],
50 'currency_symbol': '',
51 'n_sign_posn': 127,
Fredrik Lundh6c86b992000-07-09 17:12:58 +000052 'p_cs_precedes': 127,
53 'n_cs_precedes': 127,
54 'mon_grouping': [],
Marc-André Lemburg23481142000-06-08 17:49:41 +000055 'n_sep_by_space': 127,
56 'decimal_point': '.',
57 'negative_sign': '',
58 'positive_sign': '',
Fredrik Lundh6c86b992000-07-09 17:12:58 +000059 'p_sep_by_space': 127,
Marc-André Lemburg23481142000-06-08 17:49:41 +000060 'int_curr_symbol': '',
Fredrik Lundh6c86b992000-07-09 17:12:58 +000061 'p_sign_posn': 127,
Marc-André Lemburg23481142000-06-08 17:49:41 +000062 'thousands_sep': '',
Fredrik Lundh6c86b992000-07-09 17:12:58 +000063 'mon_thousands_sep': '',
64 'frac_digits': 127,
Marc-André Lemburg23481142000-06-08 17:49:41 +000065 'mon_decimal_point': '',
66 'int_frac_digits': 127}
Fredrik Lundh6c86b992000-07-09 17:12:58 +000067
Marc-André Lemburg23481142000-06-08 17:49:41 +000068 def setlocale(category, value=None):
Fredrik Lundh6c86b992000-07-09 17:12:58 +000069 """ setlocale(integer,string=None) -> string.
Marc-André Lemburg23481142000-06-08 17:49:41 +000070 Activates/queries locale processing.
71 """
Barry Warsaw7519e7a2001-03-23 17:00:07 +000072 if value is not None and value != 'C':
Fredrik Lundh6c86b992000-07-09 17:12:58 +000073 raise Error, '_locale emulation only supports "C" locale'
Marc-André Lemburg23481142000-06-08 17:49:41 +000074 return 'C'
75
76 def strcoll(a,b):
Fredrik Lundh6c86b992000-07-09 17:12:58 +000077 """ strcoll(string,string) -> int.
Marc-André Lemburg23481142000-06-08 17:49:41 +000078 Compares two strings according to the locale.
79 """
80 return cmp(a,b)
81
82 def strxfrm(s):
Fredrik Lundh6c86b992000-07-09 17:12:58 +000083 """ strxfrm(string) -> string.
Marc-André Lemburg23481142000-06-08 17:49:41 +000084 Returns a string that behaves for cmp locale-aware.
85 """
86 return s
Marc-André Lemburg5431bc32000-06-07 09:11:40 +000087
88### Number formatting APIs
89
90# Author: Martin von Loewis
Guido van Rossumeef1d4e1997-11-19 19:01:43 +000091
92#perform the grouping from right to left
93def _group(s):
94 conv=localeconv()
95 grouping=conv['grouping']
96 if not grouping:return s
97 result=""
98 while s and grouping:
Fredrik Lundh6c86b992000-07-09 17:12:58 +000099 # if grouping is -1, we are done
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000100 if grouping[0]==CHAR_MAX:
101 break
102 # 0: re-use last group ad infinitum
103 elif grouping[0]!=0:
104 #process last group
105 group=grouping[0]
106 grouping=grouping[1:]
107 if result:
108 result=s[-group:]+conv['thousands_sep']+result
109 else:
110 result=s[-group:]
111 s=s[:-group]
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000112 if not result:
113 return s
114 if s:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000115 result=s+conv['thousands_sep']+result
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000116 return result
117
118def format(f,val,grouping=0):
119 """Formats a value in the same way that the % formatting would use,
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000120 but takes the current locale into account.
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000121 Grouping is applied if the third parameter is true."""
Martin v. Löwisdb786872001-01-21 18:52:33 +0000122 result = f % abs(val)
123 fields = result.split(".")
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000124 if grouping:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000125 fields[0]=_group(fields[0])
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000126 if len(fields)==2:
Martin v. Löwisdb786872001-01-21 18:52:33 +0000127 res = fields[0]+localeconv()['decimal_point']+fields[1]
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000128 elif len(fields)==1:
Martin v. Löwisdb786872001-01-21 18:52:33 +0000129 res = fields[0]
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000130 else:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000131 raise Error, "Too many decimal points in result string"
132
Martin v. Löwisdb786872001-01-21 18:52:33 +0000133 if val < 0:
134 return '-'+res
135 else:
136 return res
137
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000138def str(val):
139 """Convert float to integer, taking the locale into account."""
140 return format("%.12g",val)
141
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000142def atof(str,func=float):
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000143 "Parses a string as a float according to the locale settings."
144 #First, get rid of the grouping
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000145 ts = localeconv()['thousands_sep']
146 if ts:
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000147 s=str.split(ts)
148 str="".join(s)
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000149 #next, replace the decimal point with a dot
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000150 dd = localeconv()['decimal_point']
151 if dd:
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000152 s=str.split(dd)
153 str='.'.join(s)
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000154 #finally, parse the string
155 return func(str)
156
157def atoi(str):
158 "Converts a string to an integer according to the locale settings."
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000159 return atof(str, int)
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000160
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000161def _test():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000162 setlocale(LC_ALL, "")
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000163 #do grouping
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000164 s1=format("%d", 123456789,1)
165 print s1, "is", atoi(s1)
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000166 #standard formatting
167 s1=str(3.14)
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000168 print s1, "is", atof(s1)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000169
170### Locale name aliasing engine
171
172# Author: Marc-Andre Lemburg, mal@lemburg.com
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000173# Various tweaks by Fredrik Lundh <effbot@telia.com>
174
175# store away the low-level version of setlocale (it's
176# overridden below)
177_setlocale = setlocale
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000178
179def normalize(localename):
180
181 """ Returns a normalized locale code for the given locale
182 name.
183
184 The returned locale code is formatted for use with
185 setlocale().
186
187 If normalization fails, the original name is returned
188 unchanged.
189
190 If the given encoding is not known, the function defaults to
191 the default encoding for the locale code just like setlocale()
192 does.
193
194 """
195 # Normalize the locale name and extract the encoding
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000196 fullname = localename.lower()
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000197 if ':' in fullname:
198 # ':' is sometimes used as encoding delimiter.
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000199 fullname = fullname.replace(':', '.')
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000200 if '.' in fullname:
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000201 langname, encoding = fullname.split('.')[:2]
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000202 fullname = langname + '.' + encoding
203 else:
204 langname = fullname
205 encoding = ''
206
207 # First lookup: fullname (possibly with encoding)
208 code = locale_alias.get(fullname, None)
209 if code is not None:
210 return code
211
212 # Second try: langname (without encoding)
213 code = locale_alias.get(langname, None)
214 if code is not None:
215 if '.' in code:
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000216 langname, defenc = code.split('.')
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000217 else:
218 langname = code
219 defenc = ''
220 if encoding:
221 encoding = encoding_alias.get(encoding, encoding)
222 else:
223 encoding = defenc
224 if encoding:
225 return langname + '.' + encoding
226 else:
227 return langname
228
229 else:
230 return localename
231
232def _parse_localename(localename):
233
234 """ Parses the locale code for localename and returns the
235 result as tuple (language code, encoding).
236
237 The localename is normalized and passed through the locale
238 alias engine. A ValueError is raised in case the locale name
239 cannot be parsed.
240
241 The language code corresponds to RFC 1766. code and encoding
242 can be None in case the values cannot be determined or are
Jeremy Hyltona05e2932000-06-28 14:48:01 +0000243 unknown to this implementation.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000244
245 """
246 code = normalize(localename)
247 if '.' in code:
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000248 return code.split('.')[:2]
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000249 elif code == 'C':
250 return None, None
251 else:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000252 raise ValueError, 'unknown locale: %s' % localename
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000253 return l
254
255def _build_localename(localetuple):
256
257 """ Builds a locale code from the given tuple (language code,
258 encoding).
259
260 No aliasing or normalizing takes place.
261
262 """
263 language, encoding = localetuple
264 if language is None:
265 language = 'C'
266 if encoding is None:
267 return language
268 else:
269 return language + '.' + encoding
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000270
271def getdefaultlocale(envvars=('LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000272
273 """ Tries to determine the default locale settings and returns
274 them as tuple (language code, encoding).
275
276 According to POSIX, a program which has not called
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000277 setlocale(LC_ALL, "") runs using the portable 'C' locale.
278 Calling setlocale(LC_ALL, "") lets it use the default locale as
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000279 defined by the LANG variable. Since we don't want to interfere
Thomas Wouters7e474022000-07-16 12:04:32 +0000280 with the current locale setting we thus emulate the behavior
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000281 in the way described above.
282
283 To maintain compatibility with other platforms, not only the
284 LANG variable is tested, but a list of variables given as
285 envvars parameter. The first found to be defined will be
286 used. envvars defaults to the search path used in GNU gettext;
287 it must always contain the variable name 'LANG'.
288
289 Except for the code 'C', the language code corresponds to RFC
290 1766. code and encoding can be None in case the values cannot
291 be determined.
292
293 """
Fredrik Lundh04661322000-07-09 23:16:10 +0000294
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000295 try:
296 # check if it's supported by the _locale module
297 import _locale
298 code, encoding = _locale._getdefaultlocale()
Fredrik Lundh04661322000-07-09 23:16:10 +0000299 except (ImportError, AttributeError):
300 pass
301 else:
Fredrik Lundh663809e2000-07-10 19:32:19 +0000302 # make sure the code/encoding values are valid
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000303 if sys.platform == "win32" and code and code[:2] == "0x":
304 # map windows language identifier to language name
305 code = windows_locale.get(int(code, 0))
Fredrik Lundh663809e2000-07-10 19:32:19 +0000306 # ...add other platform-specific processing here, if
307 # necessary...
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000308 return code, encoding
Fredrik Lundh04661322000-07-09 23:16:10 +0000309
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000310 # fall back on POSIX behaviour
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000311 import os
312 lookup = os.environ.get
313 for variable in envvars:
314 localename = lookup(variable,None)
315 if localename is not None:
316 break
317 else:
318 localename = 'C'
319 return _parse_localename(localename)
320
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000321
322def getlocale(category=LC_CTYPE):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000323
324 """ Returns the current setting for the given locale category as
325 tuple (language code, encoding).
326
327 category may be one of the LC_* value except LC_ALL. It
328 defaults to LC_CTYPE.
329
330 Except for the code 'C', the language code corresponds to RFC
331 1766. code and encoding can be None in case the values cannot
332 be determined.
333
334 """
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000335 localename = _setlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000336 if category == LC_ALL and ';' in localename:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000337 raise TypeError, 'category LC_ALL is not supported'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000338 return _parse_localename(localename)
339
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000340def setlocale(category, locale=None):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000341
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000342 """ Set the locale for the given category. The locale can be
343 a string, a locale tuple (language code, encoding), or None.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000344
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000345 Locale tuples are converted to strings the locale aliasing
346 engine. Locale strings are passed directly to the C lib.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000347
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000348 category may be given as one of the LC_* values.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000349
350 """
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000351 if locale and type(locale) is not type(""):
352 # convert to string
353 locale = normalize(_build_localename(locale))
354 return _setlocale(category, locale)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000355
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000356def resetlocale(category=LC_ALL):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000357
358 """ Sets the locale for category to the default setting.
359
360 The default setting is determined by calling
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000361 getdefaultlocale(). category defaults to LC_ALL.
362
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000363 """
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000364 _setlocale(category, _build_localename(getdefaultlocale()))
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000365
366### Database
367#
368# The following data was extracted from the locale.alias file which
369# comes with X11 and then hand edited removing the explicit encoding
370# definitions and adding some more aliases. The file is usually
371# available as /usr/lib/X11/locale/locale.alias.
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000372#
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000373
374#
375# The encoding_alias table maps lowercase encoding alias names to C
376# locale encoding names (case-sensitive).
377#
378encoding_alias = {
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000379 '437': 'C',
380 'c': 'C',
381 'iso8859': 'ISO8859-1',
382 '8859': 'ISO8859-1',
383 '88591': 'ISO8859-1',
384 'ascii': 'ISO8859-1',
385 'en': 'ISO8859-1',
386 'iso88591': 'ISO8859-1',
387 'iso_8859-1': 'ISO8859-1',
388 '885915': 'ISO8859-15',
389 'iso885915': 'ISO8859-15',
390 'iso_8859-15': 'ISO8859-15',
391 'iso8859-2': 'ISO8859-2',
392 'iso88592': 'ISO8859-2',
393 'iso_8859-2': 'ISO8859-2',
394 'iso88595': 'ISO8859-5',
395 'iso88596': 'ISO8859-6',
396 'iso88597': 'ISO8859-7',
397 'iso88598': 'ISO8859-8',
398 'iso88599': 'ISO8859-9',
399 'iso-2022-jp': 'JIS7',
400 'jis': 'JIS7',
401 'jis7': 'JIS7',
402 'sjis': 'SJIS',
403 'tis620': 'TACTIS',
404 'ajec': 'eucJP',
405 'eucjp': 'eucJP',
406 'ujis': 'eucJP',
407 'utf-8': 'utf',
408 'utf8': 'utf',
409 'utf8@ucs4': 'utf',
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000410}
411
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000412#
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000413# The locale_alias table maps lowercase alias names to C locale names
414# (case-sensitive). Encodings are always separated from the locale
415# name using a dot ('.'); they should only be given in case the
416# language name is needed to interpret the given encoding alias
417# correctly (CJK codes often have this need).
418#
419locale_alias = {
420 'american': 'en_US.ISO8859-1',
421 'ar': 'ar_AA.ISO8859-6',
422 'ar_aa': 'ar_AA.ISO8859-6',
423 'ar_sa': 'ar_SA.ISO8859-6',
424 'arabic': 'ar_AA.ISO8859-6',
425 'bg': 'bg_BG.ISO8859-5',
426 'bg_bg': 'bg_BG.ISO8859-5',
427 'bulgarian': 'bg_BG.ISO8859-5',
428 'c-french': 'fr_CA.ISO8859-1',
429 'c': 'C',
430 'c_c': 'C',
431 'cextend': 'en_US.ISO8859-1',
432 'chinese-s': 'zh_CN.eucCN',
433 'chinese-t': 'zh_TW.eucTW',
434 'croatian': 'hr_HR.ISO8859-2',
435 'cs': 'cs_CZ.ISO8859-2',
436 'cs_cs': 'cs_CZ.ISO8859-2',
437 'cs_cz': 'cs_CZ.ISO8859-2',
438 'cz': 'cz_CZ.ISO8859-2',
439 'cz_cz': 'cz_CZ.ISO8859-2',
440 'czech': 'cs_CS.ISO8859-2',
441 'da': 'da_DK.ISO8859-1',
442 'da_dk': 'da_DK.ISO8859-1',
443 'danish': 'da_DK.ISO8859-1',
444 'de': 'de_DE.ISO8859-1',
445 'de_at': 'de_AT.ISO8859-1',
446 'de_ch': 'de_CH.ISO8859-1',
447 'de_de': 'de_DE.ISO8859-1',
448 'dutch': 'nl_BE.ISO8859-1',
449 'ee': 'ee_EE.ISO8859-4',
450 'el': 'el_GR.ISO8859-7',
451 'el_gr': 'el_GR.ISO8859-7',
452 'en': 'en_US.ISO8859-1',
453 'en_au': 'en_AU.ISO8859-1',
454 'en_ca': 'en_CA.ISO8859-1',
455 'en_gb': 'en_GB.ISO8859-1',
456 'en_ie': 'en_IE.ISO8859-1',
457 'en_nz': 'en_NZ.ISO8859-1',
458 'en_uk': 'en_GB.ISO8859-1',
459 'en_us': 'en_US.ISO8859-1',
460 'eng_gb': 'en_GB.ISO8859-1',
461 'english': 'en_EN.ISO8859-1',
462 'english_uk': 'en_GB.ISO8859-1',
463 'english_united-states': 'en_US.ISO8859-1',
464 'english_us': 'en_US.ISO8859-1',
465 'es': 'es_ES.ISO8859-1',
466 'es_ar': 'es_AR.ISO8859-1',
467 'es_bo': 'es_BO.ISO8859-1',
468 'es_cl': 'es_CL.ISO8859-1',
469 'es_co': 'es_CO.ISO8859-1',
470 'es_cr': 'es_CR.ISO8859-1',
471 'es_ec': 'es_EC.ISO8859-1',
472 'es_es': 'es_ES.ISO8859-1',
473 'es_gt': 'es_GT.ISO8859-1',
474 'es_mx': 'es_MX.ISO8859-1',
475 'es_ni': 'es_NI.ISO8859-1',
476 'es_pa': 'es_PA.ISO8859-1',
477 'es_pe': 'es_PE.ISO8859-1',
478 'es_py': 'es_PY.ISO8859-1',
479 'es_sv': 'es_SV.ISO8859-1',
480 'es_uy': 'es_UY.ISO8859-1',
481 'es_ve': 'es_VE.ISO8859-1',
482 'et': 'et_EE.ISO8859-4',
483 'et_ee': 'et_EE.ISO8859-4',
484 'fi': 'fi_FI.ISO8859-1',
485 'fi_fi': 'fi_FI.ISO8859-1',
486 'finnish': 'fi_FI.ISO8859-1',
487 'fr': 'fr_FR.ISO8859-1',
488 'fr_be': 'fr_BE.ISO8859-1',
489 'fr_ca': 'fr_CA.ISO8859-1',
490 'fr_ch': 'fr_CH.ISO8859-1',
491 'fr_fr': 'fr_FR.ISO8859-1',
492 'fre_fr': 'fr_FR.ISO8859-1',
493 'french': 'fr_FR.ISO8859-1',
494 'french_france': 'fr_FR.ISO8859-1',
495 'ger_de': 'de_DE.ISO8859-1',
496 'german': 'de_DE.ISO8859-1',
497 'german_germany': 'de_DE.ISO8859-1',
498 'greek': 'el_GR.ISO8859-7',
499 'hebrew': 'iw_IL.ISO8859-8',
500 'hr': 'hr_HR.ISO8859-2',
501 'hr_hr': 'hr_HR.ISO8859-2',
502 'hu': 'hu_HU.ISO8859-2',
503 'hu_hu': 'hu_HU.ISO8859-2',
504 'hungarian': 'hu_HU.ISO8859-2',
505 'icelandic': 'is_IS.ISO8859-1',
506 'id': 'id_ID.ISO8859-1',
507 'id_id': 'id_ID.ISO8859-1',
508 'is': 'is_IS.ISO8859-1',
509 'is_is': 'is_IS.ISO8859-1',
510 'iso-8859-1': 'en_US.ISO8859-1',
511 'iso-8859-15': 'en_US.ISO8859-15',
512 'iso8859-1': 'en_US.ISO8859-1',
513 'iso8859-15': 'en_US.ISO8859-15',
514 'iso_8859_1': 'en_US.ISO8859-1',
515 'iso_8859_15': 'en_US.ISO8859-15',
516 'it': 'it_IT.ISO8859-1',
517 'it_ch': 'it_CH.ISO8859-1',
518 'it_it': 'it_IT.ISO8859-1',
519 'italian': 'it_IT.ISO8859-1',
520 'iw': 'iw_IL.ISO8859-8',
521 'iw_il': 'iw_IL.ISO8859-8',
522 'ja': 'ja_JP.eucJP',
523 'ja.jis': 'ja_JP.JIS7',
524 'ja.sjis': 'ja_JP.SJIS',
525 'ja_jp': 'ja_JP.eucJP',
526 'ja_jp.ajec': 'ja_JP.eucJP',
527 'ja_jp.euc': 'ja_JP.eucJP',
528 'ja_jp.eucjp': 'ja_JP.eucJP',
529 'ja_jp.iso-2022-jp': 'ja_JP.JIS7',
530 'ja_jp.jis': 'ja_JP.JIS7',
531 'ja_jp.jis7': 'ja_JP.JIS7',
532 'ja_jp.mscode': 'ja_JP.SJIS',
533 'ja_jp.sjis': 'ja_JP.SJIS',
534 'ja_jp.ujis': 'ja_JP.eucJP',
535 'japan': 'ja_JP.eucJP',
536 'japanese': 'ja_JP.SJIS',
537 'japanese-euc': 'ja_JP.eucJP',
538 'japanese.euc': 'ja_JP.eucJP',
539 'jp_jp': 'ja_JP.eucJP',
540 'ko': 'ko_KR.eucKR',
541 'ko_kr': 'ko_KR.eucKR',
542 'ko_kr.euc': 'ko_KR.eucKR',
543 'korean': 'ko_KR.eucKR',
544 'lt': 'lt_LT.ISO8859-4',
545 'lv': 'lv_LV.ISO8859-4',
546 'mk': 'mk_MK.ISO8859-5',
547 'mk_mk': 'mk_MK.ISO8859-5',
548 'nl': 'nl_NL.ISO8859-1',
549 'nl_be': 'nl_BE.ISO8859-1',
550 'nl_nl': 'nl_NL.ISO8859-1',
551 'no': 'no_NO.ISO8859-1',
552 'no_no': 'no_NO.ISO8859-1',
553 'norwegian': 'no_NO.ISO8859-1',
554 'pl': 'pl_PL.ISO8859-2',
555 'pl_pl': 'pl_PL.ISO8859-2',
556 'polish': 'pl_PL.ISO8859-2',
557 'portuguese': 'pt_PT.ISO8859-1',
558 'portuguese_brazil': 'pt_BR.ISO8859-1',
559 'posix': 'C',
560 'posix-utf2': 'C',
561 'pt': 'pt_PT.ISO8859-1',
562 'pt_br': 'pt_BR.ISO8859-1',
563 'pt_pt': 'pt_PT.ISO8859-1',
564 'ro': 'ro_RO.ISO8859-2',
565 'ro_ro': 'ro_RO.ISO8859-2',
566 'ru': 'ru_RU.ISO8859-5',
567 'ru_ru': 'ru_RU.ISO8859-5',
568 'rumanian': 'ro_RO.ISO8859-2',
569 'russian': 'ru_RU.ISO8859-5',
570 'serbocroatian': 'sh_YU.ISO8859-2',
571 'sh': 'sh_YU.ISO8859-2',
572 'sh_hr': 'sh_HR.ISO8859-2',
573 'sh_sp': 'sh_YU.ISO8859-2',
574 'sh_yu': 'sh_YU.ISO8859-2',
575 'sk': 'sk_SK.ISO8859-2',
576 'sk_sk': 'sk_SK.ISO8859-2',
577 'sl': 'sl_CS.ISO8859-2',
578 'sl_cs': 'sl_CS.ISO8859-2',
579 'sl_si': 'sl_SI.ISO8859-2',
580 'slovak': 'sk_SK.ISO8859-2',
581 'slovene': 'sl_CS.ISO8859-2',
582 'sp': 'sp_YU.ISO8859-5',
583 'sp_yu': 'sp_YU.ISO8859-5',
584 'spanish': 'es_ES.ISO8859-1',
585 'spanish_spain': 'es_ES.ISO8859-1',
586 'sr_sp': 'sr_SP.ISO8859-2',
587 'sv': 'sv_SE.ISO8859-1',
588 'sv_se': 'sv_SE.ISO8859-1',
589 'swedish': 'sv_SE.ISO8859-1',
590 'th_th': 'th_TH.TACTIS',
591 'tr': 'tr_TR.ISO8859-9',
592 'tr_tr': 'tr_TR.ISO8859-9',
593 'turkish': 'tr_TR.ISO8859-9',
594 'univ': 'en_US.utf',
595 'universal': 'en_US.utf',
596 'zh': 'zh_CN.eucCN',
597 'zh_cn': 'zh_CN.eucCN',
598 'zh_cn.big5': 'zh_TW.eucTW',
599 'zh_cn.euc': 'zh_CN.eucCN',
600 'zh_tw': 'zh_TW.eucTW',
601 'zh_tw.euc': 'zh_TW.eucTW',
602}
603
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000604#
605# this maps windows language identifiers (as used on Windows 95 and
606# earlier) to locale strings.
607#
608# NOTE: this mapping is incomplete. If your language is missing, send
609# a note with the missing language identifier and the suggested locale
610# code to Fredrik Lundh <effbot@telia.com>. Thanks /F
611
612windows_locale = {
613 0x0404: "zh_TW", # Chinese (Taiwan)
614 0x0804: "zh_CN", # Chinese (PRC)
615 0x0406: "da_DK", # Danish
616 0x0413: "nl_NL", # Dutch (Netherlands)
617 0x0409: "en_US", # English (United States)
618 0x0809: "en_UK", # English (United Kingdom)
619 0x0c09: "en_AU", # English (Australian)
620 0x1009: "en_CA", # English (Canadian)
621 0x1409: "en_NZ", # English (New Zealand)
622 0x1809: "en_IE", # English (Ireland)
623 0x1c09: "en_ZA", # English (South Africa)
624 0x040b: "fi_FI", # Finnish
625 0x040c: "fr_FR", # French (Standard)
626 0x080c: "fr_BE", # French (Belgian)
627 0x0c0c: "fr_CA", # French (Canadian)
628 0x100c: "fr_CH", # French (Switzerland)
629 0x0407: "de_DE", # German (Standard)
630 0x0408: "el_GR", # Greek
631 0x040d: "iw_IL", # Hebrew
632 0x040f: "is_IS", # Icelandic
633 0x0410: "it_IT", # Italian (Standard)
634 0x0411: "ja_JA", # Japanese
635 0x0414: "no_NO", # Norwegian (Bokmal)
636 0x0816: "pt_PT", # Portuguese (Standard)
637 0x0c0a: "es_ES", # Spanish (Modern Sort)
638 0x0441: "sw_KE", # Swahili (Kenya)
639 0x041d: "sv_SE", # Swedish
640 0x081d: "sv_FI", # Swedish (Finland)
641 0x041f: "tr_TR", # Turkish
642}
643
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000644def _print_locale():
645
646 """ Test function.
647 """
648 categories = {}
649 def _init_categories(categories=categories):
650 for k,v in globals().items():
651 if k[:3] == 'LC_':
652 categories[k] = v
653 _init_categories()
654 del categories['LC_ALL']
655
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000656 print 'Locale defaults as determined by getdefaultlocale():'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000657 print '-'*72
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000658 lang, enc = getdefaultlocale()
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000659 print 'Language: ', lang or '(undefined)'
660 print 'Encoding: ', enc or '(undefined)'
661 print
662
663 print 'Locale settings on startup:'
664 print '-'*72
665 for name,category in categories.items():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000666 print name, '...'
667 lang, enc = getlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000668 print ' Language: ', lang or '(undefined)'
669 print ' Encoding: ', enc or '(undefined)'
670 print
671
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000672 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000673 print 'Locale settings after calling resetlocale():'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000674 print '-'*72
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000675 resetlocale()
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000676 for name,category in categories.items():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000677 print name, '...'
678 lang, enc = getlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000679 print ' Language: ', lang or '(undefined)'
680 print ' Encoding: ', enc or '(undefined)'
681 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000682
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000683 try:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000684 setlocale(LC_ALL, "")
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000685 except:
686 print 'NOTE:'
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000687 print 'setlocale(LC_ALL, "") does not support the default locale'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000688 print 'given in the OS environment variables.'
689 else:
690 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000691 print 'Locale settings after calling setlocale(LC_ALL, ""):'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000692 print '-'*72
693 for name,category in categories.items():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000694 print name, '...'
695 lang, enc = getlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000696 print ' Language: ', lang or '(undefined)'
697 print ' Encoding: ', enc or '(undefined)'
698 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000699
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000700###
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000701
Tim Peters1baf8292001-01-24 10:13:46 +0000702try:
703 LC_MESSAGES
704except:
705 pass
706else:
707 __all__.append("LC_MESSAGES")
708
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000709if __name__=='__main__':
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000710 print 'Locale aliasing:'
711 print
712 _print_locale()
713 print
714 print 'Number formatting:'
715 print
716 _test()