blob: 6c6eeeafae8903affddd6a8385b9186d402703e0 [file] [log] [blame]
Marc-André Lemburg5431bc32000-06-07 09:11:40 +00001""" Locale support.
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00002
Marc-André Lemburg5431bc32000-06-07 09:11:40 +00003 The module provides low-level access to the C lib's locale APIs
4 and adds high level number formatting APIs as well as a locale
5 aliasing engine to complement these.
6
7 The aliasing engine includes support for many commonly used locale
8 names and maps them to values suitable for passing to the C lib's
9 setlocale() function. It also includes default encodings for all
10 supported locale names.
11
12"""
13
Eric S. Raymondbe9b5072001-02-09 10:48:30 +000014import sys
Marc-André Lemburg5431bc32000-06-07 09:11:40 +000015
Fredrik Lundh6c86b992000-07-09 17:12:58 +000016# Try importing the _locale module.
17#
18# If this fails, fall back on a basic 'C' locale emulation.
Guido van Rossumeef1d4e1997-11-19 19:01:43 +000019
Tim Peters1baf8292001-01-24 10:13:46 +000020# Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before
21# trying the import. So __all__ is also fiddled at the end of the file.
Skip Montanaro17ab1232001-01-24 06:27:27 +000022__all__ = ["setlocale","Error","localeconv","strcoll","strxfrm",
23 "format","str","atof","atoi","LC_CTYPE","LC_COLLATE",
Tim Peters1baf8292001-01-24 10:13:46 +000024 "LC_TIME","LC_MONETARY","LC_NUMERIC", "LC_ALL","CHAR_MAX"]
Skip Montanaro17ab1232001-01-24 06:27:27 +000025
Marc-André Lemburg23481142000-06-08 17:49:41 +000026try:
Fredrik Lundh6c86b992000-07-09 17:12:58 +000027
Marc-André Lemburg23481142000-06-08 17:49:41 +000028 from _locale import *
29
30except ImportError:
31
Fredrik Lundh6c86b992000-07-09 17:12:58 +000032 # Locale emulation
33
Marc-André Lemburg23481142000-06-08 17:49:41 +000034 CHAR_MAX = 127
35 LC_ALL = 6
36 LC_COLLATE = 3
37 LC_CTYPE = 0
38 LC_MESSAGES = 5
39 LC_MONETARY = 4
40 LC_NUMERIC = 1
41 LC_TIME = 2
42 Error = ValueError
43
44 def localeconv():
Fredrik Lundh6c86b992000-07-09 17:12:58 +000045 """ localeconv() -> dict.
Marc-André Lemburg23481142000-06-08 17:49:41 +000046 Returns numeric and monetary locale-specific parameters.
47 """
48 # 'C' locale default values
49 return {'grouping': [127],
50 'currency_symbol': '',
51 'n_sign_posn': 127,
Fredrik Lundh6c86b992000-07-09 17:12:58 +000052 'p_cs_precedes': 127,
53 'n_cs_precedes': 127,
54 'mon_grouping': [],
Marc-André Lemburg23481142000-06-08 17:49:41 +000055 'n_sep_by_space': 127,
56 'decimal_point': '.',
57 'negative_sign': '',
58 'positive_sign': '',
Fredrik Lundh6c86b992000-07-09 17:12:58 +000059 'p_sep_by_space': 127,
Marc-André Lemburg23481142000-06-08 17:49:41 +000060 'int_curr_symbol': '',
Fredrik Lundh6c86b992000-07-09 17:12:58 +000061 'p_sign_posn': 127,
Marc-André Lemburg23481142000-06-08 17:49:41 +000062 'thousands_sep': '',
Fredrik Lundh6c86b992000-07-09 17:12:58 +000063 'mon_thousands_sep': '',
64 'frac_digits': 127,
Marc-André Lemburg23481142000-06-08 17:49:41 +000065 'mon_decimal_point': '',
66 'int_frac_digits': 127}
Fredrik Lundh6c86b992000-07-09 17:12:58 +000067
Marc-André Lemburg23481142000-06-08 17:49:41 +000068 def setlocale(category, value=None):
Fredrik Lundh6c86b992000-07-09 17:12:58 +000069 """ setlocale(integer,string=None) -> string.
Marc-André Lemburg23481142000-06-08 17:49:41 +000070 Activates/queries locale processing.
71 """
Martin v. Löwis103d6e72003-03-30 15:42:13 +000072 if value not in (None, '', 'C'):
Fredrik Lundh6c86b992000-07-09 17:12:58 +000073 raise Error, '_locale emulation only supports "C" locale'
Marc-André Lemburg23481142000-06-08 17:49:41 +000074 return 'C'
75
76 def strcoll(a,b):
Fredrik Lundh6c86b992000-07-09 17:12:58 +000077 """ strcoll(string,string) -> int.
Marc-André Lemburg23481142000-06-08 17:49:41 +000078 Compares two strings according to the locale.
79 """
80 return cmp(a,b)
81
82 def strxfrm(s):
Fredrik Lundh6c86b992000-07-09 17:12:58 +000083 """ strxfrm(string) -> string.
Marc-André Lemburg23481142000-06-08 17:49:41 +000084 Returns a string that behaves for cmp locale-aware.
85 """
86 return s
Marc-André Lemburg5431bc32000-06-07 09:11:40 +000087
88### Number formatting APIs
89
90# Author: Martin von Loewis
Guido van Rossumeef1d4e1997-11-19 19:01:43 +000091
92#perform the grouping from right to left
93def _group(s):
94 conv=localeconv()
95 grouping=conv['grouping']
Guido van Rossum67addfe2001-04-16 16:04:10 +000096 if not grouping:return (s, 0)
Guido van Rossumeef1d4e1997-11-19 19:01:43 +000097 result=""
Martin v. Löwis88ad12a2001-04-13 08:09:50 +000098 seps = 0
99 spaces = ""
100 if s[-1] == ' ':
101 sp = s.find(' ')
102 spaces = s[sp:]
103 s = s[:sp]
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000104 while s and grouping:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000105 # if grouping is -1, we are done
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000106 if grouping[0]==CHAR_MAX:
107 break
108 # 0: re-use last group ad infinitum
109 elif grouping[0]!=0:
110 #process last group
111 group=grouping[0]
112 grouping=grouping[1:]
113 if result:
114 result=s[-group:]+conv['thousands_sep']+result
Martin v. Löwis88ad12a2001-04-13 08:09:50 +0000115 seps += 1
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000116 else:
117 result=s[-group:]
118 s=s[:-group]
Martin v. Löwis88ad12a2001-04-13 08:09:50 +0000119 if s and s[-1] not in "0123456789":
120 # the leading string is only spaces and signs
121 return s+result+spaces,seps
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000122 if not result:
Martin v. Löwis88ad12a2001-04-13 08:09:50 +0000123 return s+spaces,seps
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000124 if s:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000125 result=s+conv['thousands_sep']+result
Martin v. Löwis88ad12a2001-04-13 08:09:50 +0000126 seps += 1
127 return result+spaces,seps
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000128
129def format(f,val,grouping=0):
130 """Formats a value in the same way that the % formatting would use,
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000131 but takes the current locale into account.
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000132 Grouping is applied if the third parameter is true."""
Martin v. Löwis88ad12a2001-04-13 08:09:50 +0000133 result = f % val
Martin v. Löwisdb786872001-01-21 18:52:33 +0000134 fields = result.split(".")
Martin v. Löwis88ad12a2001-04-13 08:09:50 +0000135 seps = 0
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000136 if grouping:
Martin v. Löwis88ad12a2001-04-13 08:09:50 +0000137 fields[0],seps=_group(fields[0])
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000138 if len(fields)==2:
Martin v. Löwis88ad12a2001-04-13 08:09:50 +0000139 result = fields[0]+localeconv()['decimal_point']+fields[1]
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000140 elif len(fields)==1:
Martin v. Löwis88ad12a2001-04-13 08:09:50 +0000141 result = fields[0]
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000142 else:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000143 raise Error, "Too many decimal points in result string"
144
Martin v. Löwis88ad12a2001-04-13 08:09:50 +0000145 while seps:
146 # If the number was formatted for a specific width, then it
147 # might have been filled with spaces to the left or right. If
148 # so, kill as much spaces as there where separators.
149 # Leading zeroes as fillers are not yet dealt with, as it is
150 # not clear how they should interact with grouping.
151 sp = result.find(" ")
152 if sp==-1:break
153 result = result[:sp]+result[sp+1:]
154 seps -= 1
155
156 return result
Martin v. Löwisdb786872001-01-21 18:52:33 +0000157
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000158def str(val):
159 """Convert float to integer, taking the locale into account."""
160 return format("%.12g",val)
161
Brett Cannonaaeffaf2004-03-23 23:50:17 +0000162def atof(string,func=float):
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000163 "Parses a string as a float according to the locale settings."
164 #First, get rid of the grouping
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000165 ts = localeconv()['thousands_sep']
166 if ts:
Skip Montanaro249369c2004-04-10 16:39:32 +0000167 string = string.replace(ts, '')
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000168 #next, replace the decimal point with a dot
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000169 dd = localeconv()['decimal_point']
170 if dd:
Skip Montanaro249369c2004-04-10 16:39:32 +0000171 string = string.replace(dd, '.')
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000172 #finally, parse the string
Skip Montanaro249369c2004-04-10 16:39:32 +0000173 return func(string)
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000174
175def atoi(str):
176 "Converts a string to an integer according to the locale settings."
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000177 return atof(str, int)
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000178
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000179def _test():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000180 setlocale(LC_ALL, "")
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000181 #do grouping
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000182 s1=format("%d", 123456789,1)
183 print s1, "is", atoi(s1)
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000184 #standard formatting
185 s1=str(3.14)
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000186 print s1, "is", atof(s1)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000187
188### Locale name aliasing engine
189
190# Author: Marc-Andre Lemburg, mal@lemburg.com
Fredrik Lundh37a09822002-10-19 20:19:10 +0000191# Various tweaks by Fredrik Lundh <fredrik@pythonware.com>
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000192
193# store away the low-level version of setlocale (it's
194# overridden below)
195_setlocale = setlocale
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000196
197def normalize(localename):
198
199 """ Returns a normalized locale code for the given locale
200 name.
201
202 The returned locale code is formatted for use with
203 setlocale().
204
205 If normalization fails, the original name is returned
206 unchanged.
207
208 If the given encoding is not known, the function defaults to
209 the default encoding for the locale code just like setlocale()
210 does.
211
212 """
213 # Normalize the locale name and extract the encoding
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000214 fullname = localename.lower()
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000215 if ':' in fullname:
216 # ':' is sometimes used as encoding delimiter.
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000217 fullname = fullname.replace(':', '.')
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000218 if '.' in fullname:
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000219 langname, encoding = fullname.split('.')[:2]
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000220 fullname = langname + '.' + encoding
221 else:
222 langname = fullname
223 encoding = ''
224
225 # First lookup: fullname (possibly with encoding)
226 code = locale_alias.get(fullname, None)
227 if code is not None:
228 return code
229
230 # Second try: langname (without encoding)
231 code = locale_alias.get(langname, None)
232 if code is not None:
233 if '.' in code:
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000234 langname, defenc = code.split('.')
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000235 else:
236 langname = code
237 defenc = ''
238 if encoding:
239 encoding = encoding_alias.get(encoding, encoding)
240 else:
241 encoding = defenc
242 if encoding:
243 return langname + '.' + encoding
244 else:
245 return langname
246
247 else:
248 return localename
249
250def _parse_localename(localename):
251
252 """ Parses the locale code for localename and returns the
253 result as tuple (language code, encoding).
254
255 The localename is normalized and passed through the locale
256 alias engine. A ValueError is raised in case the locale name
257 cannot be parsed.
258
259 The language code corresponds to RFC 1766. code and encoding
260 can be None in case the values cannot be determined or are
Jeremy Hyltona05e2932000-06-28 14:48:01 +0000261 unknown to this implementation.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000262
263 """
264 code = normalize(localename)
Martin v. Löwisf0a46682002-11-03 17:20:12 +0000265 if '@' in localename:
266 # Deal with locale modifiers
267 code, modifier = code.split('@')
268 if modifier == 'euro' and '.' not in code:
269 # Assume Latin-9 for @euro locales. This is bogus,
270 # since some systems may use other encodings for these
271 # locales. Also, we ignore other modifiers.
272 return code, 'iso-8859-15'
Tim Peters230a60c2002-11-09 05:08:07 +0000273
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000274 if '.' in code:
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000275 return code.split('.')[:2]
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000276 elif code == 'C':
277 return None, None
Andrew M. Kuchling1f877ef2001-08-13 14:50:44 +0000278 raise ValueError, 'unknown locale: %s' % localename
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000279
280def _build_localename(localetuple):
281
282 """ Builds a locale code from the given tuple (language code,
283 encoding).
284
285 No aliasing or normalizing takes place.
286
287 """
288 language, encoding = localetuple
289 if language is None:
290 language = 'C'
291 if encoding is None:
292 return language
293 else:
294 return language + '.' + encoding
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000295
296def getdefaultlocale(envvars=('LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000297
298 """ Tries to determine the default locale settings and returns
299 them as tuple (language code, encoding).
300
301 According to POSIX, a program which has not called
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000302 setlocale(LC_ALL, "") runs using the portable 'C' locale.
303 Calling setlocale(LC_ALL, "") lets it use the default locale as
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000304 defined by the LANG variable. Since we don't want to interfere
Thomas Wouters7e474022000-07-16 12:04:32 +0000305 with the current locale setting we thus emulate the behavior
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000306 in the way described above.
307
308 To maintain compatibility with other platforms, not only the
309 LANG variable is tested, but a list of variables given as
310 envvars parameter. The first found to be defined will be
311 used. envvars defaults to the search path used in GNU gettext;
312 it must always contain the variable name 'LANG'.
313
314 Except for the code 'C', the language code corresponds to RFC
315 1766. code and encoding can be None in case the values cannot
316 be determined.
317
318 """
Fredrik Lundh04661322000-07-09 23:16:10 +0000319
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000320 try:
321 # check if it's supported by the _locale module
322 import _locale
323 code, encoding = _locale._getdefaultlocale()
Fredrik Lundh04661322000-07-09 23:16:10 +0000324 except (ImportError, AttributeError):
325 pass
326 else:
Fredrik Lundh663809e2000-07-10 19:32:19 +0000327 # make sure the code/encoding values are valid
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000328 if sys.platform == "win32" and code and code[:2] == "0x":
329 # map windows language identifier to language name
330 code = windows_locale.get(int(code, 0))
Fredrik Lundh663809e2000-07-10 19:32:19 +0000331 # ...add other platform-specific processing here, if
332 # necessary...
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000333 return code, encoding
Fredrik Lundh04661322000-07-09 23:16:10 +0000334
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000335 # fall back on POSIX behaviour
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000336 import os
337 lookup = os.environ.get
338 for variable in envvars:
339 localename = lookup(variable,None)
Martin v. Löwisc8ae31d2004-07-26 12:45:18 +0000340 if localename:
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000341 break
342 else:
343 localename = 'C'
344 return _parse_localename(localename)
345
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000346
347def getlocale(category=LC_CTYPE):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000348
349 """ Returns the current setting for the given locale category as
350 tuple (language code, encoding).
351
352 category may be one of the LC_* value except LC_ALL. It
353 defaults to LC_CTYPE.
354
355 Except for the code 'C', the language code corresponds to RFC
356 1766. code and encoding can be None in case the values cannot
357 be determined.
358
359 """
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000360 localename = _setlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000361 if category == LC_ALL and ';' in localename:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000362 raise TypeError, 'category LC_ALL is not supported'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000363 return _parse_localename(localename)
364
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000365def setlocale(category, locale=None):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000366
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000367 """ Set the locale for the given category. The locale can be
368 a string, a locale tuple (language code, encoding), or None.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000369
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000370 Locale tuples are converted to strings the locale aliasing
371 engine. Locale strings are passed directly to the C lib.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000372
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000373 category may be given as one of the LC_* values.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000374
375 """
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000376 if locale and type(locale) is not type(""):
377 # convert to string
378 locale = normalize(_build_localename(locale))
379 return _setlocale(category, locale)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000380
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000381def resetlocale(category=LC_ALL):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000382
383 """ Sets the locale for category to the default setting.
384
385 The default setting is determined by calling
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000386 getdefaultlocale(). category defaults to LC_ALL.
387
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000388 """
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000389 _setlocale(category, _build_localename(getdefaultlocale()))
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000390
Martin v. Löwisf0a46682002-11-03 17:20:12 +0000391if sys.platform in ('win32', 'darwin', 'mac'):
392 # On Win32, this will return the ANSI code page
393 # On the Mac, it should return the system encoding;
394 # it might return "ascii" instead
395 def getpreferredencoding(do_setlocale = True):
396 """Return the charset that the user is likely using."""
397 import _locale
Tim Petersa326f472002-11-05 03:49:09 +0000398 return _locale._getdefaultlocale()[1]
Martin v. Löwisf0a46682002-11-03 17:20:12 +0000399else:
400 # On Unix, if CODESET is available, use that.
401 try:
402 CODESET
403 except NameError:
404 # Fall back to parsing environment variables :-(
405 def getpreferredencoding(do_setlocale = True):
406 """Return the charset that the user is likely using,
407 by looking at environment variables."""
408 return getdefaultlocale()[1]
409 else:
410 def getpreferredencoding(do_setlocale = True):
411 """Return the charset that the user is likely using,
412 according to the system configuration."""
413 if do_setlocale:
414 oldloc = setlocale(LC_CTYPE)
415 setlocale(LC_CTYPE, "")
416 result = nl_langinfo(CODESET)
417 setlocale(LC_CTYPE, oldloc)
418 return result
419 else:
420 return nl_langinfo(CODESET)
Tim Peters230a60c2002-11-09 05:08:07 +0000421
Martin v. Löwisf0a46682002-11-03 17:20:12 +0000422
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000423### Database
424#
425# The following data was extracted from the locale.alias file which
426# comes with X11 and then hand edited removing the explicit encoding
427# definitions and adding some more aliases. The file is usually
428# available as /usr/lib/X11/locale/locale.alias.
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000429#
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000430
431#
432# The encoding_alias table maps lowercase encoding alias names to C
433# locale encoding names (case-sensitive).
434#
435encoding_alias = {
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000436 '437': 'C',
437 'c': 'C',
438 'iso8859': 'ISO8859-1',
439 '8859': 'ISO8859-1',
440 '88591': 'ISO8859-1',
441 'ascii': 'ISO8859-1',
442 'en': 'ISO8859-1',
443 'iso88591': 'ISO8859-1',
444 'iso_8859-1': 'ISO8859-1',
445 '885915': 'ISO8859-15',
446 'iso885915': 'ISO8859-15',
447 'iso_8859-15': 'ISO8859-15',
448 'iso8859-2': 'ISO8859-2',
449 'iso88592': 'ISO8859-2',
450 'iso_8859-2': 'ISO8859-2',
451 'iso88595': 'ISO8859-5',
452 'iso88596': 'ISO8859-6',
453 'iso88597': 'ISO8859-7',
454 'iso88598': 'ISO8859-8',
455 'iso88599': 'ISO8859-9',
456 'iso-2022-jp': 'JIS7',
457 'jis': 'JIS7',
458 'jis7': 'JIS7',
459 'sjis': 'SJIS',
460 'tis620': 'TACTIS',
461 'ajec': 'eucJP',
462 'eucjp': 'eucJP',
463 'ujis': 'eucJP',
464 'utf-8': 'utf',
465 'utf8': 'utf',
466 'utf8@ucs4': 'utf',
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000467}
468
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000469#
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000470# The locale_alias table maps lowercase alias names to C locale names
471# (case-sensitive). Encodings are always separated from the locale
472# name using a dot ('.'); they should only be given in case the
473# language name is needed to interpret the given encoding alias
474# correctly (CJK codes often have this need).
475#
476locale_alias = {
477 'american': 'en_US.ISO8859-1',
478 'ar': 'ar_AA.ISO8859-6',
479 'ar_aa': 'ar_AA.ISO8859-6',
480 'ar_sa': 'ar_SA.ISO8859-6',
481 'arabic': 'ar_AA.ISO8859-6',
482 'bg': 'bg_BG.ISO8859-5',
483 'bg_bg': 'bg_BG.ISO8859-5',
484 'bulgarian': 'bg_BG.ISO8859-5',
485 'c-french': 'fr_CA.ISO8859-1',
486 'c': 'C',
487 'c_c': 'C',
488 'cextend': 'en_US.ISO8859-1',
489 'chinese-s': 'zh_CN.eucCN',
490 'chinese-t': 'zh_TW.eucTW',
491 'croatian': 'hr_HR.ISO8859-2',
492 'cs': 'cs_CZ.ISO8859-2',
493 'cs_cs': 'cs_CZ.ISO8859-2',
494 'cs_cz': 'cs_CZ.ISO8859-2',
495 'cz': 'cz_CZ.ISO8859-2',
496 'cz_cz': 'cz_CZ.ISO8859-2',
497 'czech': 'cs_CS.ISO8859-2',
498 'da': 'da_DK.ISO8859-1',
499 'da_dk': 'da_DK.ISO8859-1',
500 'danish': 'da_DK.ISO8859-1',
501 'de': 'de_DE.ISO8859-1',
502 'de_at': 'de_AT.ISO8859-1',
503 'de_ch': 'de_CH.ISO8859-1',
504 'de_de': 'de_DE.ISO8859-1',
505 'dutch': 'nl_BE.ISO8859-1',
506 'ee': 'ee_EE.ISO8859-4',
507 'el': 'el_GR.ISO8859-7',
508 'el_gr': 'el_GR.ISO8859-7',
509 'en': 'en_US.ISO8859-1',
510 'en_au': 'en_AU.ISO8859-1',
511 'en_ca': 'en_CA.ISO8859-1',
512 'en_gb': 'en_GB.ISO8859-1',
513 'en_ie': 'en_IE.ISO8859-1',
514 'en_nz': 'en_NZ.ISO8859-1',
515 'en_uk': 'en_GB.ISO8859-1',
516 'en_us': 'en_US.ISO8859-1',
517 'eng_gb': 'en_GB.ISO8859-1',
518 'english': 'en_EN.ISO8859-1',
519 'english_uk': 'en_GB.ISO8859-1',
520 'english_united-states': 'en_US.ISO8859-1',
521 'english_us': 'en_US.ISO8859-1',
522 'es': 'es_ES.ISO8859-1',
523 'es_ar': 'es_AR.ISO8859-1',
524 'es_bo': 'es_BO.ISO8859-1',
525 'es_cl': 'es_CL.ISO8859-1',
526 'es_co': 'es_CO.ISO8859-1',
527 'es_cr': 'es_CR.ISO8859-1',
528 'es_ec': 'es_EC.ISO8859-1',
529 'es_es': 'es_ES.ISO8859-1',
530 'es_gt': 'es_GT.ISO8859-1',
531 'es_mx': 'es_MX.ISO8859-1',
532 'es_ni': 'es_NI.ISO8859-1',
533 'es_pa': 'es_PA.ISO8859-1',
534 'es_pe': 'es_PE.ISO8859-1',
535 'es_py': 'es_PY.ISO8859-1',
536 'es_sv': 'es_SV.ISO8859-1',
537 'es_uy': 'es_UY.ISO8859-1',
538 'es_ve': 'es_VE.ISO8859-1',
539 'et': 'et_EE.ISO8859-4',
540 'et_ee': 'et_EE.ISO8859-4',
541 'fi': 'fi_FI.ISO8859-1',
542 'fi_fi': 'fi_FI.ISO8859-1',
543 'finnish': 'fi_FI.ISO8859-1',
544 'fr': 'fr_FR.ISO8859-1',
545 'fr_be': 'fr_BE.ISO8859-1',
546 'fr_ca': 'fr_CA.ISO8859-1',
547 'fr_ch': 'fr_CH.ISO8859-1',
548 'fr_fr': 'fr_FR.ISO8859-1',
549 'fre_fr': 'fr_FR.ISO8859-1',
550 'french': 'fr_FR.ISO8859-1',
551 'french_france': 'fr_FR.ISO8859-1',
552 'ger_de': 'de_DE.ISO8859-1',
553 'german': 'de_DE.ISO8859-1',
554 'german_germany': 'de_DE.ISO8859-1',
555 'greek': 'el_GR.ISO8859-7',
556 'hebrew': 'iw_IL.ISO8859-8',
557 'hr': 'hr_HR.ISO8859-2',
558 'hr_hr': 'hr_HR.ISO8859-2',
559 'hu': 'hu_HU.ISO8859-2',
560 'hu_hu': 'hu_HU.ISO8859-2',
561 'hungarian': 'hu_HU.ISO8859-2',
562 'icelandic': 'is_IS.ISO8859-1',
563 'id': 'id_ID.ISO8859-1',
564 'id_id': 'id_ID.ISO8859-1',
565 'is': 'is_IS.ISO8859-1',
566 'is_is': 'is_IS.ISO8859-1',
567 'iso-8859-1': 'en_US.ISO8859-1',
568 'iso-8859-15': 'en_US.ISO8859-15',
569 'iso8859-1': 'en_US.ISO8859-1',
570 'iso8859-15': 'en_US.ISO8859-15',
571 'iso_8859_1': 'en_US.ISO8859-1',
572 'iso_8859_15': 'en_US.ISO8859-15',
573 'it': 'it_IT.ISO8859-1',
574 'it_ch': 'it_CH.ISO8859-1',
575 'it_it': 'it_IT.ISO8859-1',
576 'italian': 'it_IT.ISO8859-1',
577 'iw': 'iw_IL.ISO8859-8',
578 'iw_il': 'iw_IL.ISO8859-8',
579 'ja': 'ja_JP.eucJP',
580 'ja.jis': 'ja_JP.JIS7',
581 'ja.sjis': 'ja_JP.SJIS',
582 'ja_jp': 'ja_JP.eucJP',
583 'ja_jp.ajec': 'ja_JP.eucJP',
584 'ja_jp.euc': 'ja_JP.eucJP',
585 'ja_jp.eucjp': 'ja_JP.eucJP',
586 'ja_jp.iso-2022-jp': 'ja_JP.JIS7',
587 'ja_jp.jis': 'ja_JP.JIS7',
588 'ja_jp.jis7': 'ja_JP.JIS7',
589 'ja_jp.mscode': 'ja_JP.SJIS',
590 'ja_jp.sjis': 'ja_JP.SJIS',
591 'ja_jp.ujis': 'ja_JP.eucJP',
592 'japan': 'ja_JP.eucJP',
593 'japanese': 'ja_JP.SJIS',
594 'japanese-euc': 'ja_JP.eucJP',
595 'japanese.euc': 'ja_JP.eucJP',
596 'jp_jp': 'ja_JP.eucJP',
597 'ko': 'ko_KR.eucKR',
598 'ko_kr': 'ko_KR.eucKR',
599 'ko_kr.euc': 'ko_KR.eucKR',
600 'korean': 'ko_KR.eucKR',
601 'lt': 'lt_LT.ISO8859-4',
602 'lv': 'lv_LV.ISO8859-4',
603 'mk': 'mk_MK.ISO8859-5',
604 'mk_mk': 'mk_MK.ISO8859-5',
605 'nl': 'nl_NL.ISO8859-1',
606 'nl_be': 'nl_BE.ISO8859-1',
607 'nl_nl': 'nl_NL.ISO8859-1',
608 'no': 'no_NO.ISO8859-1',
609 'no_no': 'no_NO.ISO8859-1',
610 'norwegian': 'no_NO.ISO8859-1',
611 'pl': 'pl_PL.ISO8859-2',
612 'pl_pl': 'pl_PL.ISO8859-2',
613 'polish': 'pl_PL.ISO8859-2',
614 'portuguese': 'pt_PT.ISO8859-1',
615 'portuguese_brazil': 'pt_BR.ISO8859-1',
616 'posix': 'C',
617 'posix-utf2': 'C',
618 'pt': 'pt_PT.ISO8859-1',
619 'pt_br': 'pt_BR.ISO8859-1',
620 'pt_pt': 'pt_PT.ISO8859-1',
621 'ro': 'ro_RO.ISO8859-2',
622 'ro_ro': 'ro_RO.ISO8859-2',
623 'ru': 'ru_RU.ISO8859-5',
624 'ru_ru': 'ru_RU.ISO8859-5',
625 'rumanian': 'ro_RO.ISO8859-2',
626 'russian': 'ru_RU.ISO8859-5',
627 'serbocroatian': 'sh_YU.ISO8859-2',
628 'sh': 'sh_YU.ISO8859-2',
629 'sh_hr': 'sh_HR.ISO8859-2',
630 'sh_sp': 'sh_YU.ISO8859-2',
631 'sh_yu': 'sh_YU.ISO8859-2',
632 'sk': 'sk_SK.ISO8859-2',
633 'sk_sk': 'sk_SK.ISO8859-2',
634 'sl': 'sl_CS.ISO8859-2',
635 'sl_cs': 'sl_CS.ISO8859-2',
636 'sl_si': 'sl_SI.ISO8859-2',
637 'slovak': 'sk_SK.ISO8859-2',
638 'slovene': 'sl_CS.ISO8859-2',
639 'sp': 'sp_YU.ISO8859-5',
640 'sp_yu': 'sp_YU.ISO8859-5',
641 'spanish': 'es_ES.ISO8859-1',
642 'spanish_spain': 'es_ES.ISO8859-1',
643 'sr_sp': 'sr_SP.ISO8859-2',
644 'sv': 'sv_SE.ISO8859-1',
645 'sv_se': 'sv_SE.ISO8859-1',
646 'swedish': 'sv_SE.ISO8859-1',
647 'th_th': 'th_TH.TACTIS',
648 'tr': 'tr_TR.ISO8859-9',
649 'tr_tr': 'tr_TR.ISO8859-9',
650 'turkish': 'tr_TR.ISO8859-9',
651 'univ': 'en_US.utf',
652 'universal': 'en_US.utf',
653 'zh': 'zh_CN.eucCN',
654 'zh_cn': 'zh_CN.eucCN',
655 'zh_cn.big5': 'zh_TW.eucTW',
656 'zh_cn.euc': 'zh_CN.eucCN',
657 'zh_tw': 'zh_TW.eucTW',
658 'zh_tw.euc': 'zh_TW.eucTW',
659}
660
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000661#
662# this maps windows language identifiers (as used on Windows 95 and
663# earlier) to locale strings.
664#
Fredrik Lundh37a09822002-10-19 20:19:10 +0000665# NOTE: this mapping is incomplete. If your language is missing, please
666# submit a bug report to Python bug manager, which you can find via:
667# http://www.python.org/dev/
668# Make sure you include the missing language identifier and the suggested
669# locale code.
670#
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000671
672windows_locale = {
673 0x0404: "zh_TW", # Chinese (Taiwan)
674 0x0804: "zh_CN", # Chinese (PRC)
675 0x0406: "da_DK", # Danish
676 0x0413: "nl_NL", # Dutch (Netherlands)
677 0x0409: "en_US", # English (United States)
678 0x0809: "en_UK", # English (United Kingdom)
679 0x0c09: "en_AU", # English (Australian)
680 0x1009: "en_CA", # English (Canadian)
681 0x1409: "en_NZ", # English (New Zealand)
682 0x1809: "en_IE", # English (Ireland)
683 0x1c09: "en_ZA", # English (South Africa)
684 0x040b: "fi_FI", # Finnish
685 0x040c: "fr_FR", # French (Standard)
686 0x080c: "fr_BE", # French (Belgian)
687 0x0c0c: "fr_CA", # French (Canadian)
688 0x100c: "fr_CH", # French (Switzerland)
689 0x0407: "de_DE", # German (Standard)
690 0x0408: "el_GR", # Greek
691 0x040d: "iw_IL", # Hebrew
692 0x040f: "is_IS", # Icelandic
693 0x0410: "it_IT", # Italian (Standard)
694 0x0411: "ja_JA", # Japanese
695 0x0414: "no_NO", # Norwegian (Bokmal)
696 0x0816: "pt_PT", # Portuguese (Standard)
697 0x0c0a: "es_ES", # Spanish (Modern Sort)
698 0x0441: "sw_KE", # Swahili (Kenya)
699 0x041d: "sv_SE", # Swedish
700 0x081d: "sv_FI", # Swedish (Finland)
701 0x041f: "tr_TR", # Turkish
702}
703
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000704def _print_locale():
705
706 """ Test function.
707 """
708 categories = {}
709 def _init_categories(categories=categories):
710 for k,v in globals().items():
711 if k[:3] == 'LC_':
712 categories[k] = v
713 _init_categories()
714 del categories['LC_ALL']
715
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000716 print 'Locale defaults as determined by getdefaultlocale():'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000717 print '-'*72
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000718 lang, enc = getdefaultlocale()
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000719 print 'Language: ', lang or '(undefined)'
720 print 'Encoding: ', enc or '(undefined)'
721 print
722
723 print 'Locale settings on startup:'
724 print '-'*72
725 for name,category in categories.items():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000726 print name, '...'
727 lang, enc = getlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000728 print ' Language: ', lang or '(undefined)'
729 print ' Encoding: ', enc or '(undefined)'
730 print
731
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000732 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000733 print 'Locale settings after calling resetlocale():'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000734 print '-'*72
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000735 resetlocale()
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000736 for name,category in categories.items():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000737 print name, '...'
738 lang, enc = getlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000739 print ' Language: ', lang or '(undefined)'
740 print ' Encoding: ', enc or '(undefined)'
741 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000742
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000743 try:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000744 setlocale(LC_ALL, "")
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000745 except:
746 print 'NOTE:'
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000747 print 'setlocale(LC_ALL, "") does not support the default locale'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000748 print 'given in the OS environment variables.'
749 else:
750 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000751 print 'Locale settings after calling setlocale(LC_ALL, ""):'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000752 print '-'*72
753 for name,category in categories.items():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000754 print name, '...'
755 lang, enc = getlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000756 print ' Language: ', lang or '(undefined)'
757 print ' Encoding: ', enc or '(undefined)'
758 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000759
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000760###
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000761
Tim Peters1baf8292001-01-24 10:13:46 +0000762try:
763 LC_MESSAGES
Skip Montanaro0897f0c2002-03-25 21:40:36 +0000764except NameError:
Tim Peters1baf8292001-01-24 10:13:46 +0000765 pass
766else:
767 __all__.append("LC_MESSAGES")
768
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000769if __name__=='__main__':
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000770 print 'Locale aliasing:'
771 print
772 _print_locale()
773 print
774 print 'Number formatting:'
775 print
776 _test()