blob: 2028948e9beefd52862ad8d1b1ec40f973ae8b03 [file] [log] [blame]
Marc-André Lemburg5431bc32000-06-07 09:11:40 +00001""" Locale support.
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00002
Marc-André Lemburg5431bc32000-06-07 09:11:40 +00003 The module provides low-level access to the C lib's locale APIs
4 and adds high level number formatting APIs as well as a locale
5 aliasing engine to complement these.
6
7 The aliasing engine includes support for many commonly used locale
8 names and maps them to values suitable for passing to the C lib's
9 setlocale() function. It also includes default encodings for all
10 supported locale names.
11
12"""
13
Eric S. Raymondbe9b5072001-02-09 10:48:30 +000014import sys
Marc-André Lemburg5431bc32000-06-07 09:11:40 +000015
Fredrik Lundh6c86b992000-07-09 17:12:58 +000016# Try importing the _locale module.
17#
18# If this fails, fall back on a basic 'C' locale emulation.
Guido van Rossumeef1d4e1997-11-19 19:01:43 +000019
Tim Peters1baf8292001-01-24 10:13:46 +000020# Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before
21# trying the import. So __all__ is also fiddled at the end of the file.
Skip Montanaro17ab1232001-01-24 06:27:27 +000022__all__ = ["setlocale","Error","localeconv","strcoll","strxfrm",
23 "format","str","atof","atoi","LC_CTYPE","LC_COLLATE",
Tim Peters1baf8292001-01-24 10:13:46 +000024 "LC_TIME","LC_MONETARY","LC_NUMERIC", "LC_ALL","CHAR_MAX"]
Skip Montanaro17ab1232001-01-24 06:27:27 +000025
Marc-André Lemburg23481142000-06-08 17:49:41 +000026try:
Fredrik Lundh6c86b992000-07-09 17:12:58 +000027
Marc-André Lemburg23481142000-06-08 17:49:41 +000028 from _locale import *
29
30except ImportError:
31
Fredrik Lundh6c86b992000-07-09 17:12:58 +000032 # Locale emulation
33
Marc-André Lemburg23481142000-06-08 17:49:41 +000034 CHAR_MAX = 127
35 LC_ALL = 6
36 LC_COLLATE = 3
37 LC_CTYPE = 0
38 LC_MESSAGES = 5
39 LC_MONETARY = 4
40 LC_NUMERIC = 1
41 LC_TIME = 2
42 Error = ValueError
43
44 def localeconv():
Fredrik Lundh6c86b992000-07-09 17:12:58 +000045 """ localeconv() -> dict.
Marc-André Lemburg23481142000-06-08 17:49:41 +000046 Returns numeric and monetary locale-specific parameters.
47 """
48 # 'C' locale default values
49 return {'grouping': [127],
50 'currency_symbol': '',
51 'n_sign_posn': 127,
Fredrik Lundh6c86b992000-07-09 17:12:58 +000052 'p_cs_precedes': 127,
53 'n_cs_precedes': 127,
54 'mon_grouping': [],
Marc-André Lemburg23481142000-06-08 17:49:41 +000055 'n_sep_by_space': 127,
56 'decimal_point': '.',
57 'negative_sign': '',
58 'positive_sign': '',
Fredrik Lundh6c86b992000-07-09 17:12:58 +000059 'p_sep_by_space': 127,
Marc-André Lemburg23481142000-06-08 17:49:41 +000060 'int_curr_symbol': '',
Fredrik Lundh6c86b992000-07-09 17:12:58 +000061 'p_sign_posn': 127,
Marc-André Lemburg23481142000-06-08 17:49:41 +000062 'thousands_sep': '',
Fredrik Lundh6c86b992000-07-09 17:12:58 +000063 'mon_thousands_sep': '',
64 'frac_digits': 127,
Marc-André Lemburg23481142000-06-08 17:49:41 +000065 'mon_decimal_point': '',
66 'int_frac_digits': 127}
Fredrik Lundh6c86b992000-07-09 17:12:58 +000067
Marc-André Lemburg23481142000-06-08 17:49:41 +000068 def setlocale(category, value=None):
Fredrik Lundh6c86b992000-07-09 17:12:58 +000069 """ setlocale(integer,string=None) -> string.
Marc-André Lemburg23481142000-06-08 17:49:41 +000070 Activates/queries locale processing.
71 """
Martin v. Löwis103d6e72003-03-30 15:42:13 +000072 if value not in (None, '', 'C'):
Fredrik Lundh6c86b992000-07-09 17:12:58 +000073 raise Error, '_locale emulation only supports "C" locale'
Marc-André Lemburg23481142000-06-08 17:49:41 +000074 return 'C'
75
76 def strcoll(a,b):
Fredrik Lundh6c86b992000-07-09 17:12:58 +000077 """ strcoll(string,string) -> int.
Marc-André Lemburg23481142000-06-08 17:49:41 +000078 Compares two strings according to the locale.
79 """
80 return cmp(a,b)
81
82 def strxfrm(s):
Fredrik Lundh6c86b992000-07-09 17:12:58 +000083 """ strxfrm(string) -> string.
Marc-André Lemburg23481142000-06-08 17:49:41 +000084 Returns a string that behaves for cmp locale-aware.
85 """
86 return s
Marc-André Lemburg5431bc32000-06-07 09:11:40 +000087
88### Number formatting APIs
89
90# Author: Martin von Loewis
Guido van Rossumeef1d4e1997-11-19 19:01:43 +000091
92#perform the grouping from right to left
93def _group(s):
94 conv=localeconv()
95 grouping=conv['grouping']
Guido van Rossum67addfe2001-04-16 16:04:10 +000096 if not grouping:return (s, 0)
Guido van Rossumeef1d4e1997-11-19 19:01:43 +000097 result=""
Martin v. Löwis88ad12a2001-04-13 08:09:50 +000098 seps = 0
99 spaces = ""
100 if s[-1] == ' ':
101 sp = s.find(' ')
102 spaces = s[sp:]
103 s = s[:sp]
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000104 while s and grouping:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000105 # if grouping is -1, we are done
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000106 if grouping[0]==CHAR_MAX:
107 break
108 # 0: re-use last group ad infinitum
109 elif grouping[0]!=0:
110 #process last group
111 group=grouping[0]
112 grouping=grouping[1:]
113 if result:
114 result=s[-group:]+conv['thousands_sep']+result
Martin v. Löwis88ad12a2001-04-13 08:09:50 +0000115 seps += 1
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000116 else:
117 result=s[-group:]
118 s=s[:-group]
Martin v. Löwis88ad12a2001-04-13 08:09:50 +0000119 if s and s[-1] not in "0123456789":
120 # the leading string is only spaces and signs
121 return s+result+spaces,seps
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000122 if not result:
Martin v. Löwis88ad12a2001-04-13 08:09:50 +0000123 return s+spaces,seps
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000124 if s:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000125 result=s+conv['thousands_sep']+result
Martin v. Löwis88ad12a2001-04-13 08:09:50 +0000126 seps += 1
127 return result+spaces,seps
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000128
129def format(f,val,grouping=0):
130 """Formats a value in the same way that the % formatting would use,
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000131 but takes the current locale into account.
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000132 Grouping is applied if the third parameter is true."""
Martin v. Löwis88ad12a2001-04-13 08:09:50 +0000133 result = f % val
Martin v. Löwisdb786872001-01-21 18:52:33 +0000134 fields = result.split(".")
Martin v. Löwis88ad12a2001-04-13 08:09:50 +0000135 seps = 0
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000136 if grouping:
Martin v. Löwis88ad12a2001-04-13 08:09:50 +0000137 fields[0],seps=_group(fields[0])
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000138 if len(fields)==2:
Martin v. Löwis88ad12a2001-04-13 08:09:50 +0000139 result = fields[0]+localeconv()['decimal_point']+fields[1]
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000140 elif len(fields)==1:
Martin v. Löwis88ad12a2001-04-13 08:09:50 +0000141 result = fields[0]
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000142 else:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000143 raise Error, "Too many decimal points in result string"
144
Martin v. Löwis88ad12a2001-04-13 08:09:50 +0000145 while seps:
146 # If the number was formatted for a specific width, then it
147 # might have been filled with spaces to the left or right. If
148 # so, kill as much spaces as there where separators.
149 # Leading zeroes as fillers are not yet dealt with, as it is
150 # not clear how they should interact with grouping.
151 sp = result.find(" ")
152 if sp==-1:break
153 result = result[:sp]+result[sp+1:]
154 seps -= 1
155
156 return result
Martin v. Löwisdb786872001-01-21 18:52:33 +0000157
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000158def str(val):
159 """Convert float to integer, taking the locale into account."""
160 return format("%.12g",val)
161
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000162def atof(str,func=float):
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000163 "Parses a string as a float according to the locale settings."
164 #First, get rid of the grouping
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000165 ts = localeconv()['thousands_sep']
166 if ts:
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000167 s=str.split(ts)
168 str="".join(s)
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000169 #next, replace the decimal point with a dot
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000170 dd = localeconv()['decimal_point']
171 if dd:
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000172 s=str.split(dd)
173 str='.'.join(s)
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000174 #finally, parse the string
175 return func(str)
176
177def atoi(str):
178 "Converts a string to an integer according to the locale settings."
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000179 return atof(str, int)
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000180
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000181def _test():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000182 setlocale(LC_ALL, "")
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000183 #do grouping
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000184 s1=format("%d", 123456789,1)
185 print s1, "is", atoi(s1)
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000186 #standard formatting
187 s1=str(3.14)
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000188 print s1, "is", atof(s1)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000189
190### Locale name aliasing engine
191
192# Author: Marc-Andre Lemburg, mal@lemburg.com
Fredrik Lundh37a09822002-10-19 20:19:10 +0000193# Various tweaks by Fredrik Lundh <fredrik@pythonware.com>
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000194
195# store away the low-level version of setlocale (it's
196# overridden below)
197_setlocale = setlocale
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000198
199def normalize(localename):
200
201 """ Returns a normalized locale code for the given locale
202 name.
203
204 The returned locale code is formatted for use with
205 setlocale().
206
207 If normalization fails, the original name is returned
208 unchanged.
209
210 If the given encoding is not known, the function defaults to
211 the default encoding for the locale code just like setlocale()
212 does.
213
214 """
215 # Normalize the locale name and extract the encoding
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000216 fullname = localename.lower()
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000217 if ':' in fullname:
218 # ':' is sometimes used as encoding delimiter.
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000219 fullname = fullname.replace(':', '.')
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000220 if '.' in fullname:
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000221 langname, encoding = fullname.split('.')[:2]
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000222 fullname = langname + '.' + encoding
223 else:
224 langname = fullname
225 encoding = ''
226
227 # First lookup: fullname (possibly with encoding)
228 code = locale_alias.get(fullname, None)
229 if code is not None:
230 return code
231
232 # Second try: langname (without encoding)
233 code = locale_alias.get(langname, None)
234 if code is not None:
235 if '.' in code:
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000236 langname, defenc = code.split('.')
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000237 else:
238 langname = code
239 defenc = ''
240 if encoding:
241 encoding = encoding_alias.get(encoding, encoding)
242 else:
243 encoding = defenc
244 if encoding:
245 return langname + '.' + encoding
246 else:
247 return langname
248
249 else:
250 return localename
251
252def _parse_localename(localename):
253
254 """ Parses the locale code for localename and returns the
255 result as tuple (language code, encoding).
256
257 The localename is normalized and passed through the locale
258 alias engine. A ValueError is raised in case the locale name
259 cannot be parsed.
260
261 The language code corresponds to RFC 1766. code and encoding
262 can be None in case the values cannot be determined or are
Jeremy Hyltona05e2932000-06-28 14:48:01 +0000263 unknown to this implementation.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000264
265 """
266 code = normalize(localename)
Martin v. Löwisf0a46682002-11-03 17:20:12 +0000267 if '@' in localename:
268 # Deal with locale modifiers
269 code, modifier = code.split('@')
270 if modifier == 'euro' and '.' not in code:
271 # Assume Latin-9 for @euro locales. This is bogus,
272 # since some systems may use other encodings for these
273 # locales. Also, we ignore other modifiers.
274 return code, 'iso-8859-15'
Tim Peters230a60c2002-11-09 05:08:07 +0000275
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000276 if '.' in code:
Eric S. Raymondbe9b5072001-02-09 10:48:30 +0000277 return code.split('.')[:2]
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000278 elif code == 'C':
279 return None, None
Andrew M. Kuchling1f877ef2001-08-13 14:50:44 +0000280 raise ValueError, 'unknown locale: %s' % localename
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000281
282def _build_localename(localetuple):
283
284 """ Builds a locale code from the given tuple (language code,
285 encoding).
286
287 No aliasing or normalizing takes place.
288
289 """
290 language, encoding = localetuple
291 if language is None:
292 language = 'C'
293 if encoding is None:
294 return language
295 else:
296 return language + '.' + encoding
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000297
298def getdefaultlocale(envvars=('LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000299
300 """ Tries to determine the default locale settings and returns
301 them as tuple (language code, encoding).
302
303 According to POSIX, a program which has not called
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000304 setlocale(LC_ALL, "") runs using the portable 'C' locale.
305 Calling setlocale(LC_ALL, "") lets it use the default locale as
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000306 defined by the LANG variable. Since we don't want to interfere
Thomas Wouters7e474022000-07-16 12:04:32 +0000307 with the current locale setting we thus emulate the behavior
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000308 in the way described above.
309
310 To maintain compatibility with other platforms, not only the
311 LANG variable is tested, but a list of variables given as
312 envvars parameter. The first found to be defined will be
313 used. envvars defaults to the search path used in GNU gettext;
314 it must always contain the variable name 'LANG'.
315
316 Except for the code 'C', the language code corresponds to RFC
317 1766. code and encoding can be None in case the values cannot
318 be determined.
319
320 """
Fredrik Lundh04661322000-07-09 23:16:10 +0000321
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000322 try:
323 # check if it's supported by the _locale module
324 import _locale
325 code, encoding = _locale._getdefaultlocale()
Fredrik Lundh04661322000-07-09 23:16:10 +0000326 except (ImportError, AttributeError):
327 pass
328 else:
Fredrik Lundh663809e2000-07-10 19:32:19 +0000329 # make sure the code/encoding values are valid
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000330 if sys.platform == "win32" and code and code[:2] == "0x":
331 # map windows language identifier to language name
332 code = windows_locale.get(int(code, 0))
Fredrik Lundh663809e2000-07-10 19:32:19 +0000333 # ...add other platform-specific processing here, if
334 # necessary...
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000335 return code, encoding
Fredrik Lundh04661322000-07-09 23:16:10 +0000336
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000337 # fall back on POSIX behaviour
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000338 import os
339 lookup = os.environ.get
340 for variable in envvars:
341 localename = lookup(variable,None)
342 if localename is not None:
343 break
344 else:
345 localename = 'C'
346 return _parse_localename(localename)
347
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000348
349def getlocale(category=LC_CTYPE):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000350
351 """ Returns the current setting for the given locale category as
352 tuple (language code, encoding).
353
354 category may be one of the LC_* value except LC_ALL. It
355 defaults to LC_CTYPE.
356
357 Except for the code 'C', the language code corresponds to RFC
358 1766. code and encoding can be None in case the values cannot
359 be determined.
360
361 """
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000362 localename = _setlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000363 if category == LC_ALL and ';' in localename:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000364 raise TypeError, 'category LC_ALL is not supported'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000365 return _parse_localename(localename)
366
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000367def setlocale(category, locale=None):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000368
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000369 """ Set the locale for the given category. The locale can be
370 a string, a locale tuple (language code, encoding), or None.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000371
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000372 Locale tuples are converted to strings the locale aliasing
373 engine. Locale strings are passed directly to the C lib.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000374
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000375 category may be given as one of the LC_* values.
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000376
377 """
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000378 if locale and type(locale) is not type(""):
379 # convert to string
380 locale = normalize(_build_localename(locale))
381 return _setlocale(category, locale)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000382
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000383def resetlocale(category=LC_ALL):
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000384
385 """ Sets the locale for category to the default setting.
386
387 The default setting is determined by calling
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000388 getdefaultlocale(). category defaults to LC_ALL.
389
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000390 """
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000391 _setlocale(category, _build_localename(getdefaultlocale()))
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000392
Martin v. Löwisf0a46682002-11-03 17:20:12 +0000393if sys.platform in ('win32', 'darwin', 'mac'):
394 # On Win32, this will return the ANSI code page
395 # On the Mac, it should return the system encoding;
396 # it might return "ascii" instead
397 def getpreferredencoding(do_setlocale = True):
398 """Return the charset that the user is likely using."""
399 import _locale
Tim Petersa326f472002-11-05 03:49:09 +0000400 return _locale._getdefaultlocale()[1]
Martin v. Löwisf0a46682002-11-03 17:20:12 +0000401else:
402 # On Unix, if CODESET is available, use that.
403 try:
404 CODESET
405 except NameError:
406 # Fall back to parsing environment variables :-(
407 def getpreferredencoding(do_setlocale = True):
408 """Return the charset that the user is likely using,
409 by looking at environment variables."""
410 return getdefaultlocale()[1]
411 else:
412 def getpreferredencoding(do_setlocale = True):
413 """Return the charset that the user is likely using,
414 according to the system configuration."""
415 if do_setlocale:
416 oldloc = setlocale(LC_CTYPE)
417 setlocale(LC_CTYPE, "")
418 result = nl_langinfo(CODESET)
419 setlocale(LC_CTYPE, oldloc)
420 return result
421 else:
422 return nl_langinfo(CODESET)
Tim Peters230a60c2002-11-09 05:08:07 +0000423
Martin v. Löwisf0a46682002-11-03 17:20:12 +0000424
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000425### Database
426#
427# The following data was extracted from the locale.alias file which
428# comes with X11 and then hand edited removing the explicit encoding
429# definitions and adding some more aliases. The file is usually
430# available as /usr/lib/X11/locale/locale.alias.
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000431#
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000432
433#
434# The encoding_alias table maps lowercase encoding alias names to C
435# locale encoding names (case-sensitive).
436#
437encoding_alias = {
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000438 '437': 'C',
439 'c': 'C',
440 'iso8859': 'ISO8859-1',
441 '8859': 'ISO8859-1',
442 '88591': 'ISO8859-1',
443 'ascii': 'ISO8859-1',
444 'en': 'ISO8859-1',
445 'iso88591': 'ISO8859-1',
446 'iso_8859-1': 'ISO8859-1',
447 '885915': 'ISO8859-15',
448 'iso885915': 'ISO8859-15',
449 'iso_8859-15': 'ISO8859-15',
450 'iso8859-2': 'ISO8859-2',
451 'iso88592': 'ISO8859-2',
452 'iso_8859-2': 'ISO8859-2',
453 'iso88595': 'ISO8859-5',
454 'iso88596': 'ISO8859-6',
455 'iso88597': 'ISO8859-7',
456 'iso88598': 'ISO8859-8',
457 'iso88599': 'ISO8859-9',
458 'iso-2022-jp': 'JIS7',
459 'jis': 'JIS7',
460 'jis7': 'JIS7',
461 'sjis': 'SJIS',
462 'tis620': 'TACTIS',
463 'ajec': 'eucJP',
464 'eucjp': 'eucJP',
465 'ujis': 'eucJP',
466 'utf-8': 'utf',
467 'utf8': 'utf',
468 'utf8@ucs4': 'utf',
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000469}
470
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000471#
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000472# The locale_alias table maps lowercase alias names to C locale names
473# (case-sensitive). Encodings are always separated from the locale
474# name using a dot ('.'); they should only be given in case the
475# language name is needed to interpret the given encoding alias
476# correctly (CJK codes often have this need).
477#
478locale_alias = {
479 'american': 'en_US.ISO8859-1',
480 'ar': 'ar_AA.ISO8859-6',
481 'ar_aa': 'ar_AA.ISO8859-6',
482 'ar_sa': 'ar_SA.ISO8859-6',
483 'arabic': 'ar_AA.ISO8859-6',
484 'bg': 'bg_BG.ISO8859-5',
485 'bg_bg': 'bg_BG.ISO8859-5',
486 'bulgarian': 'bg_BG.ISO8859-5',
487 'c-french': 'fr_CA.ISO8859-1',
488 'c': 'C',
489 'c_c': 'C',
490 'cextend': 'en_US.ISO8859-1',
491 'chinese-s': 'zh_CN.eucCN',
492 'chinese-t': 'zh_TW.eucTW',
493 'croatian': 'hr_HR.ISO8859-2',
494 'cs': 'cs_CZ.ISO8859-2',
495 'cs_cs': 'cs_CZ.ISO8859-2',
496 'cs_cz': 'cs_CZ.ISO8859-2',
497 'cz': 'cz_CZ.ISO8859-2',
498 'cz_cz': 'cz_CZ.ISO8859-2',
499 'czech': 'cs_CS.ISO8859-2',
500 'da': 'da_DK.ISO8859-1',
501 'da_dk': 'da_DK.ISO8859-1',
502 'danish': 'da_DK.ISO8859-1',
503 'de': 'de_DE.ISO8859-1',
504 'de_at': 'de_AT.ISO8859-1',
505 'de_ch': 'de_CH.ISO8859-1',
506 'de_de': 'de_DE.ISO8859-1',
507 'dutch': 'nl_BE.ISO8859-1',
508 'ee': 'ee_EE.ISO8859-4',
509 'el': 'el_GR.ISO8859-7',
510 'el_gr': 'el_GR.ISO8859-7',
511 'en': 'en_US.ISO8859-1',
512 'en_au': 'en_AU.ISO8859-1',
513 'en_ca': 'en_CA.ISO8859-1',
514 'en_gb': 'en_GB.ISO8859-1',
515 'en_ie': 'en_IE.ISO8859-1',
516 'en_nz': 'en_NZ.ISO8859-1',
517 'en_uk': 'en_GB.ISO8859-1',
518 'en_us': 'en_US.ISO8859-1',
519 'eng_gb': 'en_GB.ISO8859-1',
520 'english': 'en_EN.ISO8859-1',
521 'english_uk': 'en_GB.ISO8859-1',
522 'english_united-states': 'en_US.ISO8859-1',
523 'english_us': 'en_US.ISO8859-1',
524 'es': 'es_ES.ISO8859-1',
525 'es_ar': 'es_AR.ISO8859-1',
526 'es_bo': 'es_BO.ISO8859-1',
527 'es_cl': 'es_CL.ISO8859-1',
528 'es_co': 'es_CO.ISO8859-1',
529 'es_cr': 'es_CR.ISO8859-1',
530 'es_ec': 'es_EC.ISO8859-1',
531 'es_es': 'es_ES.ISO8859-1',
532 'es_gt': 'es_GT.ISO8859-1',
533 'es_mx': 'es_MX.ISO8859-1',
534 'es_ni': 'es_NI.ISO8859-1',
535 'es_pa': 'es_PA.ISO8859-1',
536 'es_pe': 'es_PE.ISO8859-1',
537 'es_py': 'es_PY.ISO8859-1',
538 'es_sv': 'es_SV.ISO8859-1',
539 'es_uy': 'es_UY.ISO8859-1',
540 'es_ve': 'es_VE.ISO8859-1',
541 'et': 'et_EE.ISO8859-4',
542 'et_ee': 'et_EE.ISO8859-4',
543 'fi': 'fi_FI.ISO8859-1',
544 'fi_fi': 'fi_FI.ISO8859-1',
545 'finnish': 'fi_FI.ISO8859-1',
546 'fr': 'fr_FR.ISO8859-1',
547 'fr_be': 'fr_BE.ISO8859-1',
548 'fr_ca': 'fr_CA.ISO8859-1',
549 'fr_ch': 'fr_CH.ISO8859-1',
550 'fr_fr': 'fr_FR.ISO8859-1',
551 'fre_fr': 'fr_FR.ISO8859-1',
552 'french': 'fr_FR.ISO8859-1',
553 'french_france': 'fr_FR.ISO8859-1',
554 'ger_de': 'de_DE.ISO8859-1',
555 'german': 'de_DE.ISO8859-1',
556 'german_germany': 'de_DE.ISO8859-1',
557 'greek': 'el_GR.ISO8859-7',
558 'hebrew': 'iw_IL.ISO8859-8',
559 'hr': 'hr_HR.ISO8859-2',
560 'hr_hr': 'hr_HR.ISO8859-2',
561 'hu': 'hu_HU.ISO8859-2',
562 'hu_hu': 'hu_HU.ISO8859-2',
563 'hungarian': 'hu_HU.ISO8859-2',
564 'icelandic': 'is_IS.ISO8859-1',
565 'id': 'id_ID.ISO8859-1',
566 'id_id': 'id_ID.ISO8859-1',
567 'is': 'is_IS.ISO8859-1',
568 'is_is': 'is_IS.ISO8859-1',
569 'iso-8859-1': 'en_US.ISO8859-1',
570 'iso-8859-15': 'en_US.ISO8859-15',
571 'iso8859-1': 'en_US.ISO8859-1',
572 'iso8859-15': 'en_US.ISO8859-15',
573 'iso_8859_1': 'en_US.ISO8859-1',
574 'iso_8859_15': 'en_US.ISO8859-15',
575 'it': 'it_IT.ISO8859-1',
576 'it_ch': 'it_CH.ISO8859-1',
577 'it_it': 'it_IT.ISO8859-1',
578 'italian': 'it_IT.ISO8859-1',
579 'iw': 'iw_IL.ISO8859-8',
580 'iw_il': 'iw_IL.ISO8859-8',
581 'ja': 'ja_JP.eucJP',
582 'ja.jis': 'ja_JP.JIS7',
583 'ja.sjis': 'ja_JP.SJIS',
584 'ja_jp': 'ja_JP.eucJP',
585 'ja_jp.ajec': 'ja_JP.eucJP',
586 'ja_jp.euc': 'ja_JP.eucJP',
587 'ja_jp.eucjp': 'ja_JP.eucJP',
588 'ja_jp.iso-2022-jp': 'ja_JP.JIS7',
589 'ja_jp.jis': 'ja_JP.JIS7',
590 'ja_jp.jis7': 'ja_JP.JIS7',
591 'ja_jp.mscode': 'ja_JP.SJIS',
592 'ja_jp.sjis': 'ja_JP.SJIS',
593 'ja_jp.ujis': 'ja_JP.eucJP',
594 'japan': 'ja_JP.eucJP',
595 'japanese': 'ja_JP.SJIS',
596 'japanese-euc': 'ja_JP.eucJP',
597 'japanese.euc': 'ja_JP.eucJP',
598 'jp_jp': 'ja_JP.eucJP',
599 'ko': 'ko_KR.eucKR',
600 'ko_kr': 'ko_KR.eucKR',
601 'ko_kr.euc': 'ko_KR.eucKR',
602 'korean': 'ko_KR.eucKR',
603 'lt': 'lt_LT.ISO8859-4',
604 'lv': 'lv_LV.ISO8859-4',
605 'mk': 'mk_MK.ISO8859-5',
606 'mk_mk': 'mk_MK.ISO8859-5',
607 'nl': 'nl_NL.ISO8859-1',
608 'nl_be': 'nl_BE.ISO8859-1',
609 'nl_nl': 'nl_NL.ISO8859-1',
610 'no': 'no_NO.ISO8859-1',
611 'no_no': 'no_NO.ISO8859-1',
612 'norwegian': 'no_NO.ISO8859-1',
613 'pl': 'pl_PL.ISO8859-2',
614 'pl_pl': 'pl_PL.ISO8859-2',
615 'polish': 'pl_PL.ISO8859-2',
616 'portuguese': 'pt_PT.ISO8859-1',
617 'portuguese_brazil': 'pt_BR.ISO8859-1',
618 'posix': 'C',
619 'posix-utf2': 'C',
620 'pt': 'pt_PT.ISO8859-1',
621 'pt_br': 'pt_BR.ISO8859-1',
622 'pt_pt': 'pt_PT.ISO8859-1',
623 'ro': 'ro_RO.ISO8859-2',
624 'ro_ro': 'ro_RO.ISO8859-2',
625 'ru': 'ru_RU.ISO8859-5',
626 'ru_ru': 'ru_RU.ISO8859-5',
627 'rumanian': 'ro_RO.ISO8859-2',
628 'russian': 'ru_RU.ISO8859-5',
629 'serbocroatian': 'sh_YU.ISO8859-2',
630 'sh': 'sh_YU.ISO8859-2',
631 'sh_hr': 'sh_HR.ISO8859-2',
632 'sh_sp': 'sh_YU.ISO8859-2',
633 'sh_yu': 'sh_YU.ISO8859-2',
634 'sk': 'sk_SK.ISO8859-2',
635 'sk_sk': 'sk_SK.ISO8859-2',
636 'sl': 'sl_CS.ISO8859-2',
637 'sl_cs': 'sl_CS.ISO8859-2',
638 'sl_si': 'sl_SI.ISO8859-2',
639 'slovak': 'sk_SK.ISO8859-2',
640 'slovene': 'sl_CS.ISO8859-2',
641 'sp': 'sp_YU.ISO8859-5',
642 'sp_yu': 'sp_YU.ISO8859-5',
643 'spanish': 'es_ES.ISO8859-1',
644 'spanish_spain': 'es_ES.ISO8859-1',
645 'sr_sp': 'sr_SP.ISO8859-2',
646 'sv': 'sv_SE.ISO8859-1',
647 'sv_se': 'sv_SE.ISO8859-1',
648 'swedish': 'sv_SE.ISO8859-1',
649 'th_th': 'th_TH.TACTIS',
650 'tr': 'tr_TR.ISO8859-9',
651 'tr_tr': 'tr_TR.ISO8859-9',
652 'turkish': 'tr_TR.ISO8859-9',
653 'univ': 'en_US.utf',
654 'universal': 'en_US.utf',
655 'zh': 'zh_CN.eucCN',
656 'zh_cn': 'zh_CN.eucCN',
657 'zh_cn.big5': 'zh_TW.eucTW',
658 'zh_cn.euc': 'zh_CN.eucCN',
659 'zh_tw': 'zh_TW.eucTW',
660 'zh_tw.euc': 'zh_TW.eucTW',
661}
662
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000663#
664# this maps windows language identifiers (as used on Windows 95 and
665# earlier) to locale strings.
666#
Fredrik Lundh37a09822002-10-19 20:19:10 +0000667# NOTE: this mapping is incomplete. If your language is missing, please
668# submit a bug report to Python bug manager, which you can find via:
669# http://www.python.org/dev/
670# Make sure you include the missing language identifier and the suggested
671# locale code.
672#
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000673
674windows_locale = {
675 0x0404: "zh_TW", # Chinese (Taiwan)
676 0x0804: "zh_CN", # Chinese (PRC)
677 0x0406: "da_DK", # Danish
678 0x0413: "nl_NL", # Dutch (Netherlands)
679 0x0409: "en_US", # English (United States)
680 0x0809: "en_UK", # English (United Kingdom)
681 0x0c09: "en_AU", # English (Australian)
682 0x1009: "en_CA", # English (Canadian)
683 0x1409: "en_NZ", # English (New Zealand)
684 0x1809: "en_IE", # English (Ireland)
685 0x1c09: "en_ZA", # English (South Africa)
686 0x040b: "fi_FI", # Finnish
687 0x040c: "fr_FR", # French (Standard)
688 0x080c: "fr_BE", # French (Belgian)
689 0x0c0c: "fr_CA", # French (Canadian)
690 0x100c: "fr_CH", # French (Switzerland)
691 0x0407: "de_DE", # German (Standard)
692 0x0408: "el_GR", # Greek
693 0x040d: "iw_IL", # Hebrew
694 0x040f: "is_IS", # Icelandic
695 0x0410: "it_IT", # Italian (Standard)
696 0x0411: "ja_JA", # Japanese
697 0x0414: "no_NO", # Norwegian (Bokmal)
698 0x0816: "pt_PT", # Portuguese (Standard)
699 0x0c0a: "es_ES", # Spanish (Modern Sort)
700 0x0441: "sw_KE", # Swahili (Kenya)
701 0x041d: "sv_SE", # Swedish
702 0x081d: "sv_FI", # Swedish (Finland)
703 0x041f: "tr_TR", # Turkish
704}
705
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000706def _print_locale():
707
708 """ Test function.
709 """
710 categories = {}
711 def _init_categories(categories=categories):
712 for k,v in globals().items():
713 if k[:3] == 'LC_':
714 categories[k] = v
715 _init_categories()
716 del categories['LC_ALL']
717
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000718 print 'Locale defaults as determined by getdefaultlocale():'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000719 print '-'*72
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000720 lang, enc = getdefaultlocale()
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000721 print 'Language: ', lang or '(undefined)'
722 print 'Encoding: ', enc or '(undefined)'
723 print
724
725 print 'Locale settings on startup:'
726 print '-'*72
727 for name,category in categories.items():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000728 print name, '...'
729 lang, enc = getlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000730 print ' Language: ', lang or '(undefined)'
731 print ' Encoding: ', enc or '(undefined)'
732 print
733
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000734 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000735 print 'Locale settings after calling resetlocale():'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000736 print '-'*72
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000737 resetlocale()
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000738 for name,category in categories.items():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000739 print name, '...'
740 lang, enc = getlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000741 print ' Language: ', lang or '(undefined)'
742 print ' Encoding: ', enc or '(undefined)'
743 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000744
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000745 try:
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000746 setlocale(LC_ALL, "")
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000747 except:
748 print 'NOTE:'
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000749 print 'setlocale(LC_ALL, "") does not support the default locale'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000750 print 'given in the OS environment variables.'
751 else:
752 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000753 print 'Locale settings after calling setlocale(LC_ALL, ""):'
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000754 print '-'*72
755 for name,category in categories.items():
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000756 print name, '...'
757 lang, enc = getlocale(category)
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000758 print ' Language: ', lang or '(undefined)'
759 print ' Encoding: ', enc or '(undefined)'
760 print
Fredrik Lundh6c86b992000-07-09 17:12:58 +0000761
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000762###
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000763
Tim Peters1baf8292001-01-24 10:13:46 +0000764try:
765 LC_MESSAGES
Skip Montanaro0897f0c2002-03-25 21:40:36 +0000766except NameError:
Tim Peters1baf8292001-01-24 10:13:46 +0000767 pass
768else:
769 __all__.append("LC_MESSAGES")
770
Guido van Rossumeef1d4e1997-11-19 19:01:43 +0000771if __name__=='__main__':
Marc-André Lemburg5431bc32000-06-07 09:11:40 +0000772 print 'Locale aliasing:'
773 print
774 _print_locale()
775 print
776 print 'Number formatting:'
777 print
778 _test()