Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 1 | """ Locale support. |
Guido van Rossum | 4b8c6ea | 2000-02-04 15:39:30 +0000 | [diff] [blame] | 2 | |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 3 | The module provides low-level access to the C lib's locale APIs |
| 4 | and adds high level number formatting APIs as well as a locale |
| 5 | aliasing engine to complement these. |
| 6 | |
| 7 | The aliasing engine includes support for many commonly used locale |
| 8 | names and maps them to values suitable for passing to the C lib's |
| 9 | setlocale() function. It also includes default encodings for all |
| 10 | supported locale names. |
| 11 | |
| 12 | """ |
| 13 | |
Eric S. Raymond | be9b507 | 2001-02-09 10:48:30 +0000 | [diff] [blame] | 14 | import sys |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 15 | |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 16 | # Try importing the _locale module. |
| 17 | # |
| 18 | # If this fails, fall back on a basic 'C' locale emulation. |
Guido van Rossum | eef1d4e | 1997-11-19 19:01:43 +0000 | [diff] [blame] | 19 | |
Tim Peters | 1baf829 | 2001-01-24 10:13:46 +0000 | [diff] [blame] | 20 | # Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before |
| 21 | # trying the import. So __all__ is also fiddled at the end of the file. |
Skip Montanaro | 17ab123 | 2001-01-24 06:27:27 +0000 | [diff] [blame] | 22 | __all__ = ["setlocale","Error","localeconv","strcoll","strxfrm", |
| 23 | "format","str","atof","atoi","LC_CTYPE","LC_COLLATE", |
Tim Peters | 1baf829 | 2001-01-24 10:13:46 +0000 | [diff] [blame] | 24 | "LC_TIME","LC_MONETARY","LC_NUMERIC", "LC_ALL","CHAR_MAX"] |
Skip Montanaro | 17ab123 | 2001-01-24 06:27:27 +0000 | [diff] [blame] | 25 | |
Marc-André Lemburg | 2348114 | 2000-06-08 17:49:41 +0000 | [diff] [blame] | 26 | try: |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 27 | |
Marc-André Lemburg | 2348114 | 2000-06-08 17:49:41 +0000 | [diff] [blame] | 28 | from _locale import * |
| 29 | |
| 30 | except ImportError: |
| 31 | |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 32 | # Locale emulation |
| 33 | |
Marc-André Lemburg | 2348114 | 2000-06-08 17:49:41 +0000 | [diff] [blame] | 34 | CHAR_MAX = 127 |
| 35 | LC_ALL = 6 |
| 36 | LC_COLLATE = 3 |
| 37 | LC_CTYPE = 0 |
| 38 | LC_MESSAGES = 5 |
| 39 | LC_MONETARY = 4 |
| 40 | LC_NUMERIC = 1 |
| 41 | LC_TIME = 2 |
| 42 | Error = ValueError |
| 43 | |
| 44 | def localeconv(): |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 45 | """ localeconv() -> dict. |
Marc-André Lemburg | 2348114 | 2000-06-08 17:49:41 +0000 | [diff] [blame] | 46 | Returns numeric and monetary locale-specific parameters. |
| 47 | """ |
| 48 | # 'C' locale default values |
| 49 | return {'grouping': [127], |
| 50 | 'currency_symbol': '', |
| 51 | 'n_sign_posn': 127, |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 52 | 'p_cs_precedes': 127, |
| 53 | 'n_cs_precedes': 127, |
| 54 | 'mon_grouping': [], |
Marc-André Lemburg | 2348114 | 2000-06-08 17:49:41 +0000 | [diff] [blame] | 55 | 'n_sep_by_space': 127, |
| 56 | 'decimal_point': '.', |
| 57 | 'negative_sign': '', |
| 58 | 'positive_sign': '', |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 59 | 'p_sep_by_space': 127, |
Marc-André Lemburg | 2348114 | 2000-06-08 17:49:41 +0000 | [diff] [blame] | 60 | 'int_curr_symbol': '', |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 61 | 'p_sign_posn': 127, |
Marc-André Lemburg | 2348114 | 2000-06-08 17:49:41 +0000 | [diff] [blame] | 62 | 'thousands_sep': '', |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 63 | 'mon_thousands_sep': '', |
| 64 | 'frac_digits': 127, |
Marc-André Lemburg | 2348114 | 2000-06-08 17:49:41 +0000 | [diff] [blame] | 65 | 'mon_decimal_point': '', |
| 66 | 'int_frac_digits': 127} |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 67 | |
Marc-André Lemburg | 2348114 | 2000-06-08 17:49:41 +0000 | [diff] [blame] | 68 | def setlocale(category, value=None): |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 69 | """ setlocale(integer,string=None) -> string. |
Marc-André Lemburg | 2348114 | 2000-06-08 17:49:41 +0000 | [diff] [blame] | 70 | Activates/queries locale processing. |
| 71 | """ |
Barry Warsaw | 7519e7a | 2001-03-23 17:00:07 +0000 | [diff] [blame] | 72 | if value is not None and value != 'C': |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 73 | raise Error, '_locale emulation only supports "C" locale' |
Marc-André Lemburg | 2348114 | 2000-06-08 17:49:41 +0000 | [diff] [blame] | 74 | return 'C' |
| 75 | |
| 76 | def strcoll(a,b): |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 77 | """ strcoll(string,string) -> int. |
Marc-André Lemburg | 2348114 | 2000-06-08 17:49:41 +0000 | [diff] [blame] | 78 | Compares two strings according to the locale. |
| 79 | """ |
| 80 | return cmp(a,b) |
| 81 | |
| 82 | def strxfrm(s): |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 83 | """ strxfrm(string) -> string. |
Marc-André Lemburg | 2348114 | 2000-06-08 17:49:41 +0000 | [diff] [blame] | 84 | Returns a string that behaves for cmp locale-aware. |
| 85 | """ |
| 86 | return s |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 87 | |
| 88 | ### Number formatting APIs |
| 89 | |
| 90 | # Author: Martin von Loewis |
Guido van Rossum | eef1d4e | 1997-11-19 19:01:43 +0000 | [diff] [blame] | 91 | |
| 92 | #perform the grouping from right to left |
| 93 | def _group(s): |
| 94 | conv=localeconv() |
| 95 | grouping=conv['grouping'] |
Guido van Rossum | 67addfe | 2001-04-16 16:04:10 +0000 | [diff] [blame] | 96 | if not grouping:return (s, 0) |
Guido van Rossum | eef1d4e | 1997-11-19 19:01:43 +0000 | [diff] [blame] | 97 | result="" |
Martin v. Löwis | 88ad12a | 2001-04-13 08:09:50 +0000 | [diff] [blame] | 98 | seps = 0 |
| 99 | spaces = "" |
| 100 | if s[-1] == ' ': |
| 101 | sp = s.find(' ') |
| 102 | spaces = s[sp:] |
| 103 | s = s[:sp] |
Guido van Rossum | eef1d4e | 1997-11-19 19:01:43 +0000 | [diff] [blame] | 104 | while s and grouping: |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 105 | # if grouping is -1, we are done |
Guido van Rossum | 45e2fbc | 1998-03-26 21:13:24 +0000 | [diff] [blame] | 106 | if grouping[0]==CHAR_MAX: |
| 107 | break |
| 108 | # 0: re-use last group ad infinitum |
| 109 | elif grouping[0]!=0: |
| 110 | #process last group |
| 111 | group=grouping[0] |
| 112 | grouping=grouping[1:] |
| 113 | if result: |
| 114 | result=s[-group:]+conv['thousands_sep']+result |
Martin v. Löwis | 88ad12a | 2001-04-13 08:09:50 +0000 | [diff] [blame] | 115 | seps += 1 |
Guido van Rossum | 45e2fbc | 1998-03-26 21:13:24 +0000 | [diff] [blame] | 116 | else: |
| 117 | result=s[-group:] |
| 118 | s=s[:-group] |
Martin v. Löwis | 88ad12a | 2001-04-13 08:09:50 +0000 | [diff] [blame] | 119 | if s and s[-1] not in "0123456789": |
| 120 | # the leading string is only spaces and signs |
| 121 | return s+result+spaces,seps |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 122 | if not result: |
Martin v. Löwis | 88ad12a | 2001-04-13 08:09:50 +0000 | [diff] [blame] | 123 | return s+spaces,seps |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 124 | if s: |
Guido van Rossum | 45e2fbc | 1998-03-26 21:13:24 +0000 | [diff] [blame] | 125 | result=s+conv['thousands_sep']+result |
Martin v. Löwis | 88ad12a | 2001-04-13 08:09:50 +0000 | [diff] [blame] | 126 | seps += 1 |
| 127 | return result+spaces,seps |
Guido van Rossum | eef1d4e | 1997-11-19 19:01:43 +0000 | [diff] [blame] | 128 | |
| 129 | def format(f,val,grouping=0): |
| 130 | """Formats a value in the same way that the % formatting would use, |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 131 | but takes the current locale into account. |
Guido van Rossum | eef1d4e | 1997-11-19 19:01:43 +0000 | [diff] [blame] | 132 | Grouping is applied if the third parameter is true.""" |
Martin v. Löwis | 88ad12a | 2001-04-13 08:09:50 +0000 | [diff] [blame] | 133 | result = f % val |
Martin v. Löwis | db78687 | 2001-01-21 18:52:33 +0000 | [diff] [blame] | 134 | fields = result.split(".") |
Martin v. Löwis | 88ad12a | 2001-04-13 08:09:50 +0000 | [diff] [blame] | 135 | seps = 0 |
Guido van Rossum | eef1d4e | 1997-11-19 19:01:43 +0000 | [diff] [blame] | 136 | if grouping: |
Martin v. Löwis | 88ad12a | 2001-04-13 08:09:50 +0000 | [diff] [blame] | 137 | fields[0],seps=_group(fields[0]) |
Guido van Rossum | eef1d4e | 1997-11-19 19:01:43 +0000 | [diff] [blame] | 138 | if len(fields)==2: |
Martin v. Löwis | 88ad12a | 2001-04-13 08:09:50 +0000 | [diff] [blame] | 139 | result = fields[0]+localeconv()['decimal_point']+fields[1] |
Guido van Rossum | eef1d4e | 1997-11-19 19:01:43 +0000 | [diff] [blame] | 140 | elif len(fields)==1: |
Martin v. Löwis | 88ad12a | 2001-04-13 08:09:50 +0000 | [diff] [blame] | 141 | result = fields[0] |
Guido van Rossum | eef1d4e | 1997-11-19 19:01:43 +0000 | [diff] [blame] | 142 | else: |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 143 | raise Error, "Too many decimal points in result string" |
| 144 | |
Martin v. Löwis | 88ad12a | 2001-04-13 08:09:50 +0000 | [diff] [blame] | 145 | while seps: |
| 146 | # If the number was formatted for a specific width, then it |
| 147 | # might have been filled with spaces to the left or right. If |
| 148 | # so, kill as much spaces as there where separators. |
| 149 | # Leading zeroes as fillers are not yet dealt with, as it is |
| 150 | # not clear how they should interact with grouping. |
| 151 | sp = result.find(" ") |
| 152 | if sp==-1:break |
| 153 | result = result[:sp]+result[sp+1:] |
| 154 | seps -= 1 |
| 155 | |
| 156 | return result |
Martin v. Löwis | db78687 | 2001-01-21 18:52:33 +0000 | [diff] [blame] | 157 | |
Guido van Rossum | eef1d4e | 1997-11-19 19:01:43 +0000 | [diff] [blame] | 158 | def str(val): |
| 159 | """Convert float to integer, taking the locale into account.""" |
| 160 | return format("%.12g",val) |
| 161 | |
Eric S. Raymond | be9b507 | 2001-02-09 10:48:30 +0000 | [diff] [blame] | 162 | def atof(str,func=float): |
Guido van Rossum | eef1d4e | 1997-11-19 19:01:43 +0000 | [diff] [blame] | 163 | "Parses a string as a float according to the locale settings." |
| 164 | #First, get rid of the grouping |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 165 | ts = localeconv()['thousands_sep'] |
| 166 | if ts: |
Eric S. Raymond | be9b507 | 2001-02-09 10:48:30 +0000 | [diff] [blame] | 167 | s=str.split(ts) |
| 168 | str="".join(s) |
Guido van Rossum | eef1d4e | 1997-11-19 19:01:43 +0000 | [diff] [blame] | 169 | #next, replace the decimal point with a dot |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 170 | dd = localeconv()['decimal_point'] |
| 171 | if dd: |
Eric S. Raymond | be9b507 | 2001-02-09 10:48:30 +0000 | [diff] [blame] | 172 | s=str.split(dd) |
| 173 | str='.'.join(s) |
Guido van Rossum | eef1d4e | 1997-11-19 19:01:43 +0000 | [diff] [blame] | 174 | #finally, parse the string |
| 175 | return func(str) |
| 176 | |
| 177 | def atoi(str): |
| 178 | "Converts a string to an integer according to the locale settings." |
Eric S. Raymond | be9b507 | 2001-02-09 10:48:30 +0000 | [diff] [blame] | 179 | return atof(str, int) |
Guido van Rossum | eef1d4e | 1997-11-19 19:01:43 +0000 | [diff] [blame] | 180 | |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 181 | def _test(): |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 182 | setlocale(LC_ALL, "") |
Guido van Rossum | eef1d4e | 1997-11-19 19:01:43 +0000 | [diff] [blame] | 183 | #do grouping |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 184 | s1=format("%d", 123456789,1) |
| 185 | print s1, "is", atoi(s1) |
Guido van Rossum | eef1d4e | 1997-11-19 19:01:43 +0000 | [diff] [blame] | 186 | #standard formatting |
| 187 | s1=str(3.14) |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 188 | print s1, "is", atof(s1) |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 189 | |
| 190 | ### Locale name aliasing engine |
| 191 | |
| 192 | # Author: Marc-Andre Lemburg, mal@lemburg.com |
Fredrik Lundh | 37a0982 | 2002-10-19 20:19:10 +0000 | [diff] [blame] | 193 | # Various tweaks by Fredrik Lundh <fredrik@pythonware.com> |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 194 | |
| 195 | # store away the low-level version of setlocale (it's |
| 196 | # overridden below) |
| 197 | _setlocale = setlocale |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 198 | |
| 199 | def normalize(localename): |
| 200 | |
| 201 | """ Returns a normalized locale code for the given locale |
| 202 | name. |
| 203 | |
| 204 | The returned locale code is formatted for use with |
| 205 | setlocale(). |
| 206 | |
| 207 | If normalization fails, the original name is returned |
| 208 | unchanged. |
| 209 | |
| 210 | If the given encoding is not known, the function defaults to |
| 211 | the default encoding for the locale code just like setlocale() |
| 212 | does. |
| 213 | |
| 214 | """ |
| 215 | # Normalize the locale name and extract the encoding |
Eric S. Raymond | be9b507 | 2001-02-09 10:48:30 +0000 | [diff] [blame] | 216 | fullname = localename.lower() |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 217 | if ':' in fullname: |
| 218 | # ':' is sometimes used as encoding delimiter. |
Eric S. Raymond | be9b507 | 2001-02-09 10:48:30 +0000 | [diff] [blame] | 219 | fullname = fullname.replace(':', '.') |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 220 | if '.' in fullname: |
Eric S. Raymond | be9b507 | 2001-02-09 10:48:30 +0000 | [diff] [blame] | 221 | langname, encoding = fullname.split('.')[:2] |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 222 | fullname = langname + '.' + encoding |
| 223 | else: |
| 224 | langname = fullname |
| 225 | encoding = '' |
| 226 | |
| 227 | # First lookup: fullname (possibly with encoding) |
| 228 | code = locale_alias.get(fullname, None) |
| 229 | if code is not None: |
| 230 | return code |
| 231 | |
| 232 | # Second try: langname (without encoding) |
| 233 | code = locale_alias.get(langname, None) |
| 234 | if code is not None: |
| 235 | if '.' in code: |
Eric S. Raymond | be9b507 | 2001-02-09 10:48:30 +0000 | [diff] [blame] | 236 | langname, defenc = code.split('.') |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 237 | else: |
| 238 | langname = code |
| 239 | defenc = '' |
| 240 | if encoding: |
| 241 | encoding = encoding_alias.get(encoding, encoding) |
| 242 | else: |
| 243 | encoding = defenc |
| 244 | if encoding: |
| 245 | return langname + '.' + encoding |
| 246 | else: |
| 247 | return langname |
| 248 | |
| 249 | else: |
| 250 | return localename |
| 251 | |
| 252 | def _parse_localename(localename): |
| 253 | |
| 254 | """ Parses the locale code for localename and returns the |
| 255 | result as tuple (language code, encoding). |
| 256 | |
| 257 | The localename is normalized and passed through the locale |
| 258 | alias engine. A ValueError is raised in case the locale name |
| 259 | cannot be parsed. |
| 260 | |
| 261 | The language code corresponds to RFC 1766. code and encoding |
| 262 | can be None in case the values cannot be determined or are |
Jeremy Hylton | a05e293 | 2000-06-28 14:48:01 +0000 | [diff] [blame] | 263 | unknown to this implementation. |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 264 | |
| 265 | """ |
| 266 | code = normalize(localename) |
Martin v. Löwis | f0a4668 | 2002-11-03 17:20:12 +0000 | [diff] [blame] | 267 | if '@' in localename: |
| 268 | # Deal with locale modifiers |
| 269 | code, modifier = code.split('@') |
| 270 | if modifier == 'euro' and '.' not in code: |
| 271 | # Assume Latin-9 for @euro locales. This is bogus, |
| 272 | # since some systems may use other encodings for these |
| 273 | # locales. Also, we ignore other modifiers. |
| 274 | return code, 'iso-8859-15' |
Tim Peters | 230a60c | 2002-11-09 05:08:07 +0000 | [diff] [blame] | 275 | |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 276 | if '.' in code: |
Eric S. Raymond | be9b507 | 2001-02-09 10:48:30 +0000 | [diff] [blame] | 277 | return code.split('.')[:2] |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 278 | elif code == 'C': |
| 279 | return None, None |
Andrew M. Kuchling | 1f877ef | 2001-08-13 14:50:44 +0000 | [diff] [blame] | 280 | raise ValueError, 'unknown locale: %s' % localename |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 281 | |
| 282 | def _build_localename(localetuple): |
| 283 | |
| 284 | """ Builds a locale code from the given tuple (language code, |
| 285 | encoding). |
| 286 | |
| 287 | No aliasing or normalizing takes place. |
| 288 | |
| 289 | """ |
| 290 | language, encoding = localetuple |
| 291 | if language is None: |
| 292 | language = 'C' |
| 293 | if encoding is None: |
| 294 | return language |
| 295 | else: |
| 296 | return language + '.' + encoding |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 297 | |
| 298 | def getdefaultlocale(envvars=('LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')): |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 299 | |
| 300 | """ Tries to determine the default locale settings and returns |
| 301 | them as tuple (language code, encoding). |
| 302 | |
| 303 | According to POSIX, a program which has not called |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 304 | setlocale(LC_ALL, "") runs using the portable 'C' locale. |
| 305 | Calling setlocale(LC_ALL, "") lets it use the default locale as |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 306 | defined by the LANG variable. Since we don't want to interfere |
Thomas Wouters | 7e47402 | 2000-07-16 12:04:32 +0000 | [diff] [blame] | 307 | with the current locale setting we thus emulate the behavior |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 308 | in the way described above. |
| 309 | |
| 310 | To maintain compatibility with other platforms, not only the |
| 311 | LANG variable is tested, but a list of variables given as |
| 312 | envvars parameter. The first found to be defined will be |
| 313 | used. envvars defaults to the search path used in GNU gettext; |
| 314 | it must always contain the variable name 'LANG'. |
| 315 | |
| 316 | Except for the code 'C', the language code corresponds to RFC |
| 317 | 1766. code and encoding can be None in case the values cannot |
| 318 | be determined. |
| 319 | |
| 320 | """ |
Fredrik Lundh | 0466132 | 2000-07-09 23:16:10 +0000 | [diff] [blame] | 321 | |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 322 | try: |
| 323 | # check if it's supported by the _locale module |
| 324 | import _locale |
| 325 | code, encoding = _locale._getdefaultlocale() |
Fredrik Lundh | 0466132 | 2000-07-09 23:16:10 +0000 | [diff] [blame] | 326 | except (ImportError, AttributeError): |
| 327 | pass |
| 328 | else: |
Fredrik Lundh | 663809e | 2000-07-10 19:32:19 +0000 | [diff] [blame] | 329 | # make sure the code/encoding values are valid |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 330 | if sys.platform == "win32" and code and code[:2] == "0x": |
| 331 | # map windows language identifier to language name |
| 332 | code = windows_locale.get(int(code, 0)) |
Fredrik Lundh | 663809e | 2000-07-10 19:32:19 +0000 | [diff] [blame] | 333 | # ...add other platform-specific processing here, if |
| 334 | # necessary... |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 335 | return code, encoding |
Fredrik Lundh | 0466132 | 2000-07-09 23:16:10 +0000 | [diff] [blame] | 336 | |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 337 | # fall back on POSIX behaviour |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 338 | import os |
| 339 | lookup = os.environ.get |
| 340 | for variable in envvars: |
| 341 | localename = lookup(variable,None) |
| 342 | if localename is not None: |
| 343 | break |
| 344 | else: |
| 345 | localename = 'C' |
| 346 | return _parse_localename(localename) |
| 347 | |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 348 | |
| 349 | def getlocale(category=LC_CTYPE): |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 350 | |
| 351 | """ Returns the current setting for the given locale category as |
| 352 | tuple (language code, encoding). |
| 353 | |
| 354 | category may be one of the LC_* value except LC_ALL. It |
| 355 | defaults to LC_CTYPE. |
| 356 | |
| 357 | Except for the code 'C', the language code corresponds to RFC |
| 358 | 1766. code and encoding can be None in case the values cannot |
| 359 | be determined. |
| 360 | |
| 361 | """ |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 362 | localename = _setlocale(category) |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 363 | if category == LC_ALL and ';' in localename: |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 364 | raise TypeError, 'category LC_ALL is not supported' |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 365 | return _parse_localename(localename) |
| 366 | |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 367 | def setlocale(category, locale=None): |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 368 | |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 369 | """ Set the locale for the given category. The locale can be |
| 370 | a string, a locale tuple (language code, encoding), or None. |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 371 | |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 372 | Locale tuples are converted to strings the locale aliasing |
| 373 | engine. Locale strings are passed directly to the C lib. |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 374 | |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 375 | category may be given as one of the LC_* values. |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 376 | |
| 377 | """ |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 378 | if locale and type(locale) is not type(""): |
| 379 | # convert to string |
| 380 | locale = normalize(_build_localename(locale)) |
| 381 | return _setlocale(category, locale) |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 382 | |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 383 | def resetlocale(category=LC_ALL): |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 384 | |
| 385 | """ Sets the locale for category to the default setting. |
| 386 | |
| 387 | The default setting is determined by calling |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 388 | getdefaultlocale(). category defaults to LC_ALL. |
| 389 | |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 390 | """ |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 391 | _setlocale(category, _build_localename(getdefaultlocale())) |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 392 | |
Martin v. Löwis | f0a4668 | 2002-11-03 17:20:12 +0000 | [diff] [blame] | 393 | if sys.platform in ('win32', 'darwin', 'mac'): |
| 394 | # On Win32, this will return the ANSI code page |
| 395 | # On the Mac, it should return the system encoding; |
| 396 | # it might return "ascii" instead |
| 397 | def getpreferredencoding(do_setlocale = True): |
| 398 | """Return the charset that the user is likely using.""" |
| 399 | import _locale |
Tim Peters | a326f47 | 2002-11-05 03:49:09 +0000 | [diff] [blame] | 400 | return _locale._getdefaultlocale()[1] |
Martin v. Löwis | f0a4668 | 2002-11-03 17:20:12 +0000 | [diff] [blame] | 401 | else: |
| 402 | # On Unix, if CODESET is available, use that. |
| 403 | try: |
| 404 | CODESET |
| 405 | except NameError: |
| 406 | # Fall back to parsing environment variables :-( |
| 407 | def getpreferredencoding(do_setlocale = True): |
| 408 | """Return the charset that the user is likely using, |
| 409 | by looking at environment variables.""" |
| 410 | return getdefaultlocale()[1] |
| 411 | else: |
| 412 | def getpreferredencoding(do_setlocale = True): |
| 413 | """Return the charset that the user is likely using, |
| 414 | according to the system configuration.""" |
| 415 | if do_setlocale: |
| 416 | oldloc = setlocale(LC_CTYPE) |
| 417 | setlocale(LC_CTYPE, "") |
| 418 | result = nl_langinfo(CODESET) |
| 419 | setlocale(LC_CTYPE, oldloc) |
| 420 | return result |
| 421 | else: |
| 422 | return nl_langinfo(CODESET) |
Tim Peters | 230a60c | 2002-11-09 05:08:07 +0000 | [diff] [blame] | 423 | |
Martin v. Löwis | f0a4668 | 2002-11-03 17:20:12 +0000 | [diff] [blame] | 424 | |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 425 | ### Database |
| 426 | # |
| 427 | # The following data was extracted from the locale.alias file which |
| 428 | # comes with X11 and then hand edited removing the explicit encoding |
| 429 | # definitions and adding some more aliases. The file is usually |
| 430 | # available as /usr/lib/X11/locale/locale.alias. |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 431 | # |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 432 | |
| 433 | # |
| 434 | # The encoding_alias table maps lowercase encoding alias names to C |
| 435 | # locale encoding names (case-sensitive). |
| 436 | # |
| 437 | encoding_alias = { |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 438 | '437': 'C', |
| 439 | 'c': 'C', |
| 440 | 'iso8859': 'ISO8859-1', |
| 441 | '8859': 'ISO8859-1', |
| 442 | '88591': 'ISO8859-1', |
| 443 | 'ascii': 'ISO8859-1', |
| 444 | 'en': 'ISO8859-1', |
| 445 | 'iso88591': 'ISO8859-1', |
| 446 | 'iso_8859-1': 'ISO8859-1', |
| 447 | '885915': 'ISO8859-15', |
| 448 | 'iso885915': 'ISO8859-15', |
| 449 | 'iso_8859-15': 'ISO8859-15', |
| 450 | 'iso8859-2': 'ISO8859-2', |
| 451 | 'iso88592': 'ISO8859-2', |
| 452 | 'iso_8859-2': 'ISO8859-2', |
| 453 | 'iso88595': 'ISO8859-5', |
| 454 | 'iso88596': 'ISO8859-6', |
| 455 | 'iso88597': 'ISO8859-7', |
| 456 | 'iso88598': 'ISO8859-8', |
| 457 | 'iso88599': 'ISO8859-9', |
| 458 | 'iso-2022-jp': 'JIS7', |
| 459 | 'jis': 'JIS7', |
| 460 | 'jis7': 'JIS7', |
| 461 | 'sjis': 'SJIS', |
| 462 | 'tis620': 'TACTIS', |
| 463 | 'ajec': 'eucJP', |
| 464 | 'eucjp': 'eucJP', |
| 465 | 'ujis': 'eucJP', |
| 466 | 'utf-8': 'utf', |
| 467 | 'utf8': 'utf', |
| 468 | 'utf8@ucs4': 'utf', |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 469 | } |
| 470 | |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 471 | # |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 472 | # The locale_alias table maps lowercase alias names to C locale names |
| 473 | # (case-sensitive). Encodings are always separated from the locale |
| 474 | # name using a dot ('.'); they should only be given in case the |
| 475 | # language name is needed to interpret the given encoding alias |
| 476 | # correctly (CJK codes often have this need). |
| 477 | # |
| 478 | locale_alias = { |
| 479 | 'american': 'en_US.ISO8859-1', |
| 480 | 'ar': 'ar_AA.ISO8859-6', |
| 481 | 'ar_aa': 'ar_AA.ISO8859-6', |
| 482 | 'ar_sa': 'ar_SA.ISO8859-6', |
| 483 | 'arabic': 'ar_AA.ISO8859-6', |
| 484 | 'bg': 'bg_BG.ISO8859-5', |
| 485 | 'bg_bg': 'bg_BG.ISO8859-5', |
| 486 | 'bulgarian': 'bg_BG.ISO8859-5', |
| 487 | 'c-french': 'fr_CA.ISO8859-1', |
| 488 | 'c': 'C', |
| 489 | 'c_c': 'C', |
| 490 | 'cextend': 'en_US.ISO8859-1', |
| 491 | 'chinese-s': 'zh_CN.eucCN', |
| 492 | 'chinese-t': 'zh_TW.eucTW', |
| 493 | 'croatian': 'hr_HR.ISO8859-2', |
| 494 | 'cs': 'cs_CZ.ISO8859-2', |
| 495 | 'cs_cs': 'cs_CZ.ISO8859-2', |
| 496 | 'cs_cz': 'cs_CZ.ISO8859-2', |
| 497 | 'cz': 'cz_CZ.ISO8859-2', |
| 498 | 'cz_cz': 'cz_CZ.ISO8859-2', |
| 499 | 'czech': 'cs_CS.ISO8859-2', |
| 500 | 'da': 'da_DK.ISO8859-1', |
| 501 | 'da_dk': 'da_DK.ISO8859-1', |
| 502 | 'danish': 'da_DK.ISO8859-1', |
| 503 | 'de': 'de_DE.ISO8859-1', |
| 504 | 'de_at': 'de_AT.ISO8859-1', |
| 505 | 'de_ch': 'de_CH.ISO8859-1', |
| 506 | 'de_de': 'de_DE.ISO8859-1', |
| 507 | 'dutch': 'nl_BE.ISO8859-1', |
| 508 | 'ee': 'ee_EE.ISO8859-4', |
| 509 | 'el': 'el_GR.ISO8859-7', |
| 510 | 'el_gr': 'el_GR.ISO8859-7', |
| 511 | 'en': 'en_US.ISO8859-1', |
| 512 | 'en_au': 'en_AU.ISO8859-1', |
| 513 | 'en_ca': 'en_CA.ISO8859-1', |
| 514 | 'en_gb': 'en_GB.ISO8859-1', |
| 515 | 'en_ie': 'en_IE.ISO8859-1', |
| 516 | 'en_nz': 'en_NZ.ISO8859-1', |
| 517 | 'en_uk': 'en_GB.ISO8859-1', |
| 518 | 'en_us': 'en_US.ISO8859-1', |
| 519 | 'eng_gb': 'en_GB.ISO8859-1', |
| 520 | 'english': 'en_EN.ISO8859-1', |
| 521 | 'english_uk': 'en_GB.ISO8859-1', |
| 522 | 'english_united-states': 'en_US.ISO8859-1', |
| 523 | 'english_us': 'en_US.ISO8859-1', |
| 524 | 'es': 'es_ES.ISO8859-1', |
| 525 | 'es_ar': 'es_AR.ISO8859-1', |
| 526 | 'es_bo': 'es_BO.ISO8859-1', |
| 527 | 'es_cl': 'es_CL.ISO8859-1', |
| 528 | 'es_co': 'es_CO.ISO8859-1', |
| 529 | 'es_cr': 'es_CR.ISO8859-1', |
| 530 | 'es_ec': 'es_EC.ISO8859-1', |
| 531 | 'es_es': 'es_ES.ISO8859-1', |
| 532 | 'es_gt': 'es_GT.ISO8859-1', |
| 533 | 'es_mx': 'es_MX.ISO8859-1', |
| 534 | 'es_ni': 'es_NI.ISO8859-1', |
| 535 | 'es_pa': 'es_PA.ISO8859-1', |
| 536 | 'es_pe': 'es_PE.ISO8859-1', |
| 537 | 'es_py': 'es_PY.ISO8859-1', |
| 538 | 'es_sv': 'es_SV.ISO8859-1', |
| 539 | 'es_uy': 'es_UY.ISO8859-1', |
| 540 | 'es_ve': 'es_VE.ISO8859-1', |
| 541 | 'et': 'et_EE.ISO8859-4', |
| 542 | 'et_ee': 'et_EE.ISO8859-4', |
| 543 | 'fi': 'fi_FI.ISO8859-1', |
| 544 | 'fi_fi': 'fi_FI.ISO8859-1', |
| 545 | 'finnish': 'fi_FI.ISO8859-1', |
| 546 | 'fr': 'fr_FR.ISO8859-1', |
| 547 | 'fr_be': 'fr_BE.ISO8859-1', |
| 548 | 'fr_ca': 'fr_CA.ISO8859-1', |
| 549 | 'fr_ch': 'fr_CH.ISO8859-1', |
| 550 | 'fr_fr': 'fr_FR.ISO8859-1', |
| 551 | 'fre_fr': 'fr_FR.ISO8859-1', |
| 552 | 'french': 'fr_FR.ISO8859-1', |
| 553 | 'french_france': 'fr_FR.ISO8859-1', |
| 554 | 'ger_de': 'de_DE.ISO8859-1', |
| 555 | 'german': 'de_DE.ISO8859-1', |
| 556 | 'german_germany': 'de_DE.ISO8859-1', |
| 557 | 'greek': 'el_GR.ISO8859-7', |
| 558 | 'hebrew': 'iw_IL.ISO8859-8', |
| 559 | 'hr': 'hr_HR.ISO8859-2', |
| 560 | 'hr_hr': 'hr_HR.ISO8859-2', |
| 561 | 'hu': 'hu_HU.ISO8859-2', |
| 562 | 'hu_hu': 'hu_HU.ISO8859-2', |
| 563 | 'hungarian': 'hu_HU.ISO8859-2', |
| 564 | 'icelandic': 'is_IS.ISO8859-1', |
| 565 | 'id': 'id_ID.ISO8859-1', |
| 566 | 'id_id': 'id_ID.ISO8859-1', |
| 567 | 'is': 'is_IS.ISO8859-1', |
| 568 | 'is_is': 'is_IS.ISO8859-1', |
| 569 | 'iso-8859-1': 'en_US.ISO8859-1', |
| 570 | 'iso-8859-15': 'en_US.ISO8859-15', |
| 571 | 'iso8859-1': 'en_US.ISO8859-1', |
| 572 | 'iso8859-15': 'en_US.ISO8859-15', |
| 573 | 'iso_8859_1': 'en_US.ISO8859-1', |
| 574 | 'iso_8859_15': 'en_US.ISO8859-15', |
| 575 | 'it': 'it_IT.ISO8859-1', |
| 576 | 'it_ch': 'it_CH.ISO8859-1', |
| 577 | 'it_it': 'it_IT.ISO8859-1', |
| 578 | 'italian': 'it_IT.ISO8859-1', |
| 579 | 'iw': 'iw_IL.ISO8859-8', |
| 580 | 'iw_il': 'iw_IL.ISO8859-8', |
| 581 | 'ja': 'ja_JP.eucJP', |
| 582 | 'ja.jis': 'ja_JP.JIS7', |
| 583 | 'ja.sjis': 'ja_JP.SJIS', |
| 584 | 'ja_jp': 'ja_JP.eucJP', |
| 585 | 'ja_jp.ajec': 'ja_JP.eucJP', |
| 586 | 'ja_jp.euc': 'ja_JP.eucJP', |
| 587 | 'ja_jp.eucjp': 'ja_JP.eucJP', |
| 588 | 'ja_jp.iso-2022-jp': 'ja_JP.JIS7', |
| 589 | 'ja_jp.jis': 'ja_JP.JIS7', |
| 590 | 'ja_jp.jis7': 'ja_JP.JIS7', |
| 591 | 'ja_jp.mscode': 'ja_JP.SJIS', |
| 592 | 'ja_jp.sjis': 'ja_JP.SJIS', |
| 593 | 'ja_jp.ujis': 'ja_JP.eucJP', |
| 594 | 'japan': 'ja_JP.eucJP', |
| 595 | 'japanese': 'ja_JP.SJIS', |
| 596 | 'japanese-euc': 'ja_JP.eucJP', |
| 597 | 'japanese.euc': 'ja_JP.eucJP', |
| 598 | 'jp_jp': 'ja_JP.eucJP', |
| 599 | 'ko': 'ko_KR.eucKR', |
| 600 | 'ko_kr': 'ko_KR.eucKR', |
| 601 | 'ko_kr.euc': 'ko_KR.eucKR', |
| 602 | 'korean': 'ko_KR.eucKR', |
| 603 | 'lt': 'lt_LT.ISO8859-4', |
| 604 | 'lv': 'lv_LV.ISO8859-4', |
| 605 | 'mk': 'mk_MK.ISO8859-5', |
| 606 | 'mk_mk': 'mk_MK.ISO8859-5', |
| 607 | 'nl': 'nl_NL.ISO8859-1', |
| 608 | 'nl_be': 'nl_BE.ISO8859-1', |
| 609 | 'nl_nl': 'nl_NL.ISO8859-1', |
| 610 | 'no': 'no_NO.ISO8859-1', |
| 611 | 'no_no': 'no_NO.ISO8859-1', |
| 612 | 'norwegian': 'no_NO.ISO8859-1', |
| 613 | 'pl': 'pl_PL.ISO8859-2', |
| 614 | 'pl_pl': 'pl_PL.ISO8859-2', |
| 615 | 'polish': 'pl_PL.ISO8859-2', |
| 616 | 'portuguese': 'pt_PT.ISO8859-1', |
| 617 | 'portuguese_brazil': 'pt_BR.ISO8859-1', |
| 618 | 'posix': 'C', |
| 619 | 'posix-utf2': 'C', |
| 620 | 'pt': 'pt_PT.ISO8859-1', |
| 621 | 'pt_br': 'pt_BR.ISO8859-1', |
| 622 | 'pt_pt': 'pt_PT.ISO8859-1', |
| 623 | 'ro': 'ro_RO.ISO8859-2', |
| 624 | 'ro_ro': 'ro_RO.ISO8859-2', |
| 625 | 'ru': 'ru_RU.ISO8859-5', |
| 626 | 'ru_ru': 'ru_RU.ISO8859-5', |
| 627 | 'rumanian': 'ro_RO.ISO8859-2', |
| 628 | 'russian': 'ru_RU.ISO8859-5', |
| 629 | 'serbocroatian': 'sh_YU.ISO8859-2', |
| 630 | 'sh': 'sh_YU.ISO8859-2', |
| 631 | 'sh_hr': 'sh_HR.ISO8859-2', |
| 632 | 'sh_sp': 'sh_YU.ISO8859-2', |
| 633 | 'sh_yu': 'sh_YU.ISO8859-2', |
| 634 | 'sk': 'sk_SK.ISO8859-2', |
| 635 | 'sk_sk': 'sk_SK.ISO8859-2', |
| 636 | 'sl': 'sl_CS.ISO8859-2', |
| 637 | 'sl_cs': 'sl_CS.ISO8859-2', |
| 638 | 'sl_si': 'sl_SI.ISO8859-2', |
| 639 | 'slovak': 'sk_SK.ISO8859-2', |
| 640 | 'slovene': 'sl_CS.ISO8859-2', |
| 641 | 'sp': 'sp_YU.ISO8859-5', |
| 642 | 'sp_yu': 'sp_YU.ISO8859-5', |
| 643 | 'spanish': 'es_ES.ISO8859-1', |
| 644 | 'spanish_spain': 'es_ES.ISO8859-1', |
| 645 | 'sr_sp': 'sr_SP.ISO8859-2', |
| 646 | 'sv': 'sv_SE.ISO8859-1', |
| 647 | 'sv_se': 'sv_SE.ISO8859-1', |
| 648 | 'swedish': 'sv_SE.ISO8859-1', |
| 649 | 'th_th': 'th_TH.TACTIS', |
| 650 | 'tr': 'tr_TR.ISO8859-9', |
| 651 | 'tr_tr': 'tr_TR.ISO8859-9', |
| 652 | 'turkish': 'tr_TR.ISO8859-9', |
| 653 | 'univ': 'en_US.utf', |
| 654 | 'universal': 'en_US.utf', |
| 655 | 'zh': 'zh_CN.eucCN', |
| 656 | 'zh_cn': 'zh_CN.eucCN', |
| 657 | 'zh_cn.big5': 'zh_TW.eucTW', |
| 658 | 'zh_cn.euc': 'zh_CN.eucCN', |
| 659 | 'zh_tw': 'zh_TW.eucTW', |
| 660 | 'zh_tw.euc': 'zh_TW.eucTW', |
| 661 | } |
| 662 | |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 663 | # |
| 664 | # this maps windows language identifiers (as used on Windows 95 and |
| 665 | # earlier) to locale strings. |
| 666 | # |
Fredrik Lundh | 37a0982 | 2002-10-19 20:19:10 +0000 | [diff] [blame] | 667 | # NOTE: this mapping is incomplete. If your language is missing, please |
| 668 | # submit a bug report to Python bug manager, which you can find via: |
| 669 | # http://www.python.org/dev/ |
| 670 | # Make sure you include the missing language identifier and the suggested |
| 671 | # locale code. |
| 672 | # |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 673 | |
| 674 | windows_locale = { |
| 675 | 0x0404: "zh_TW", # Chinese (Taiwan) |
| 676 | 0x0804: "zh_CN", # Chinese (PRC) |
| 677 | 0x0406: "da_DK", # Danish |
| 678 | 0x0413: "nl_NL", # Dutch (Netherlands) |
| 679 | 0x0409: "en_US", # English (United States) |
| 680 | 0x0809: "en_UK", # English (United Kingdom) |
| 681 | 0x0c09: "en_AU", # English (Australian) |
| 682 | 0x1009: "en_CA", # English (Canadian) |
| 683 | 0x1409: "en_NZ", # English (New Zealand) |
| 684 | 0x1809: "en_IE", # English (Ireland) |
| 685 | 0x1c09: "en_ZA", # English (South Africa) |
| 686 | 0x040b: "fi_FI", # Finnish |
| 687 | 0x040c: "fr_FR", # French (Standard) |
| 688 | 0x080c: "fr_BE", # French (Belgian) |
| 689 | 0x0c0c: "fr_CA", # French (Canadian) |
| 690 | 0x100c: "fr_CH", # French (Switzerland) |
| 691 | 0x0407: "de_DE", # German (Standard) |
| 692 | 0x0408: "el_GR", # Greek |
| 693 | 0x040d: "iw_IL", # Hebrew |
| 694 | 0x040f: "is_IS", # Icelandic |
| 695 | 0x0410: "it_IT", # Italian (Standard) |
| 696 | 0x0411: "ja_JA", # Japanese |
| 697 | 0x0414: "no_NO", # Norwegian (Bokmal) |
| 698 | 0x0816: "pt_PT", # Portuguese (Standard) |
| 699 | 0x0c0a: "es_ES", # Spanish (Modern Sort) |
| 700 | 0x0441: "sw_KE", # Swahili (Kenya) |
| 701 | 0x041d: "sv_SE", # Swedish |
| 702 | 0x081d: "sv_FI", # Swedish (Finland) |
| 703 | 0x041f: "tr_TR", # Turkish |
| 704 | } |
| 705 | |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 706 | def _print_locale(): |
| 707 | |
| 708 | """ Test function. |
| 709 | """ |
| 710 | categories = {} |
| 711 | def _init_categories(categories=categories): |
| 712 | for k,v in globals().items(): |
| 713 | if k[:3] == 'LC_': |
| 714 | categories[k] = v |
| 715 | _init_categories() |
| 716 | del categories['LC_ALL'] |
| 717 | |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 718 | print 'Locale defaults as determined by getdefaultlocale():' |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 719 | print '-'*72 |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 720 | lang, enc = getdefaultlocale() |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 721 | print 'Language: ', lang or '(undefined)' |
| 722 | print 'Encoding: ', enc or '(undefined)' |
| 723 | print |
| 724 | |
| 725 | print 'Locale settings on startup:' |
| 726 | print '-'*72 |
| 727 | for name,category in categories.items(): |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 728 | print name, '...' |
| 729 | lang, enc = getlocale(category) |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 730 | print ' Language: ', lang or '(undefined)' |
| 731 | print ' Encoding: ', enc or '(undefined)' |
| 732 | print |
| 733 | |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 734 | print |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 735 | print 'Locale settings after calling resetlocale():' |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 736 | print '-'*72 |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 737 | resetlocale() |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 738 | for name,category in categories.items(): |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 739 | print name, '...' |
| 740 | lang, enc = getlocale(category) |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 741 | print ' Language: ', lang or '(undefined)' |
| 742 | print ' Encoding: ', enc or '(undefined)' |
| 743 | print |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 744 | |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 745 | try: |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 746 | setlocale(LC_ALL, "") |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 747 | except: |
| 748 | print 'NOTE:' |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 749 | print 'setlocale(LC_ALL, "") does not support the default locale' |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 750 | print 'given in the OS environment variables.' |
| 751 | else: |
| 752 | print |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 753 | print 'Locale settings after calling setlocale(LC_ALL, ""):' |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 754 | print '-'*72 |
| 755 | for name,category in categories.items(): |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 756 | print name, '...' |
| 757 | lang, enc = getlocale(category) |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 758 | print ' Language: ', lang or '(undefined)' |
| 759 | print ' Encoding: ', enc or '(undefined)' |
| 760 | print |
Fredrik Lundh | 6c86b99 | 2000-07-09 17:12:58 +0000 | [diff] [blame] | 761 | |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 762 | ### |
Guido van Rossum | eef1d4e | 1997-11-19 19:01:43 +0000 | [diff] [blame] | 763 | |
Tim Peters | 1baf829 | 2001-01-24 10:13:46 +0000 | [diff] [blame] | 764 | try: |
| 765 | LC_MESSAGES |
Skip Montanaro | 0897f0c | 2002-03-25 21:40:36 +0000 | [diff] [blame] | 766 | except NameError: |
Tim Peters | 1baf829 | 2001-01-24 10:13:46 +0000 | [diff] [blame] | 767 | pass |
| 768 | else: |
| 769 | __all__.append("LC_MESSAGES") |
| 770 | |
Guido van Rossum | eef1d4e | 1997-11-19 19:01:43 +0000 | [diff] [blame] | 771 | if __name__=='__main__': |
Marc-André Lemburg | 5431bc3 | 2000-06-07 09:11:40 +0000 | [diff] [blame] | 772 | print 'Locale aliasing:' |
| 773 | print |
| 774 | _print_locale() |
| 775 | print |
| 776 | print 'Number formatting:' |
| 777 | print |
| 778 | _test() |