blob: c216089de96b7dcdc0e5034d436e27f7aece3938 [file] [log] [blame]
Barry Warsaw95be23d2000-08-25 19:13:37 +00001"""Internationalization and localization support.
2
3This module provides internationalization (I18N) and localization (L10N)
4support for your Python programs by providing an interface to the GNU gettext
5message catalog library.
6
7I18N refers to the operation by which a program is made aware of multiple
8languages. L10N refers to the adaptation of your program, once
9internationalized, to the local language and cultural habits. In order to
10provide multilingual messages for your Python programs, you need to take the
11following steps:
12
13 - prepare your program by specially marking translatable strings
14 - run a suite of tools over your marked program files to generate raw
15 messages catalogs
16 - create language specific translations of the message catalogs
17 - use this module so that message strings are properly translated
18
19In order to prepare your program for I18N, you need to look at all the strings
20in your program. Any string that needs to be translated should be marked by
21wrapping it in _('...') -- i.e. a call to the function `_'. For example:
22
23 filename = 'mylog.txt'
24 message = _('writing a log message')
25 fp = open(filename, 'w')
26 fp.write(message)
27 fp.close()
28
29In this example, the string `writing a log message' is marked as a candidate
30for translation, while the strings `mylog.txt' and `w' are not.
31
32The GNU gettext package provides a tool, called xgettext, that scans C and C++
33source code looking for these specially marked strings. xgettext generates
34what are called `.pot' files, essentially structured human readable files
35which contain every marked string in the source code. These .pot files are
36copied and handed over to translators who write language-specific versions for
37every supported language.
38
39For I18N Python programs however, xgettext won't work; it doesn't understand
40the myriad of string types support by Python. The standard Python
41distribution provides a tool called pygettext that does though (found in the
42Tools/i18n directory). This is a command line script that supports a similar
43interface as xgettext; see its documentation for details. Once you've used
44pygettext to create your .pot files, you can use the standard GNU gettext
45tools to generate your machine-readable .mo files, which are what's used by
46this module.
47
48In the simple case, to use this module then, you need only add the following
49bit of code to the main driver file of your application:
50
51 import gettext
52 gettext.install()
53
54This sets everything up so that your _('...') function calls Just Work. In
55other words, it installs `_' in the builtins namespace for convenience. You
56can skip this step and do it manually by the equivalent code:
57
58 import gettext
59 import __builtin__
60 __builtin__['_'] = gettext.gettext
61
62Once you've done this, you probably want to call bindtextdomain() and
63textdomain() to get the domain set up properly. Again, for convenience, you
64can pass the domain and localedir to install to set everything up in one fell
65swoop:
66
67 import gettext
68 gettext.install('mydomain', '/my/locale/dir')
69
70If your program needs to support many languages at the same time, you will
71want to create Translation objects explicitly, like so:
72
73 import gettext
74 gettext.install()
75
76 lang1 = gettext.Translations(open('/path/to/my/lang1/messages.mo'))
77 lang2 = gettext.Translations(open('/path/to/my/lang2/messages.mo'))
78 lang3 = gettext.Translations(open('/path/to/my/lang3/messages.mo'))
79
80 gettext.set(lang1)
81 # all _() will now translate to language 1
82 gettext.set(lang2)
83 # all _() will now translate to language 2
84
85Currently, only GNU gettext format binary .mo files are supported.
86
87"""
88
Barry Warsawfa488ec2000-08-25 20:26:43 +000089# This module represents the integration of work, contributions, feedback, and
90# suggestions from the following people:
Barry Warsaw95be23d2000-08-25 19:13:37 +000091#
92# Martin von Loewis, who wrote the initial implementation of the underlying
93# C-based libintlmodule (later renamed _gettext), along with a skeletal
94# gettext.py implementation.
95#
96# Peter Funk, who wrote fintl.py, a fairly complete wrapper around intlmodule,
97# which also included a pure-Python implementation to read .mo files if
98# intlmodule wasn't available.
99#
100# James Henstridge, who also wrote a gettext.py module, which has some
101# interesting, but currently unsupported experimental features: the notion of
102# a Catalog class and instances, and the ability to add to a catalog file via
103# a Python API.
104#
105# Barry Warsaw integrated these modules, wrote the .install() API and code,
106# and conformed all C and Python code to Python's coding standards.
107
108import os
109import sys
110import struct
111from UserDict import UserDict
112
113
114
115# globals
116_translations = {}
117_current_translation = None
118_current_domain = 'messages'
119
120# Domain to directory mapping, for use by bindtextdomain()
121_localedirs = {}
122
123
124
Barry Warsawfa488ec2000-08-25 20:26:43 +0000125def _expand_lang(locale):
126 from locale import normalize
127 locale = normalize(locale)
128 COMPONENT_CODESET = 1 << 0
129 COMPONENT_TERRITORY = 1 << 1
130 COMPONENT_MODIFIER = 1 << 2
131 # split up the locale into its base components
132 mask = 0
133 pos = locale.find('@')
134 if pos >= 0:
135 modifier = locale[pos:]
136 locale = locale[:pos]
137 mask |= COMPONENT_MODIFIER
138 else:
139 modifier = ''
140 pos = locale.find('.')
141 if pos >= 0:
142 codeset = locale[pos:]
143 locale = locale[:pos]
144 mask |= COMPONENT_CODESET
145 else:
146 codeset = ''
147 pos = locale.find('_')
148 if pos >= 0:
149 territory = locale[pos:]
150 locale = locale[:pos]
151 mask |= COMPONENT_TERRITORY
152 else:
153 territory = ''
154 language = locale
155 ret = []
156 for i in range(mask+1):
157 if not (i & ~mask): # if all components for this combo exist ...
158 val = language
159 if i & COMPONENT_TERRITORY: val += territory
160 if i & COMPONENT_CODESET: val += codeset
161 if i & COMPONENT_MODIFIER: val += modifier
162 ret.append(val)
163 ret.reverse()
164 return ret
165
166
167
Barry Warsaw95be23d2000-08-25 19:13:37 +0000168class GNUTranslations(UserDict):
169 # Magic number of .mo files
170 MAGIC = 0x950412de
171
172 def __init__(self, fp):
173 if fp is None:
174 d = {}
175 else:
176 d = self._parse(fp)
177 UserDict.__init__(self, d)
178
179 def _parse(self, fp):
180 """Override this method to support alternative .mo formats."""
181 unpack = struct.unpack
182 filename = getattr(fp, 'name', '')
183 # Parse the .mo file header, which consists of 5 little endian 32
184 # bit words.
185 catalog = {}
186 buf = fp.read()
187 magic, version, msgcount, masteridx, transidx = unpack(
188 '<5i', buf[:20])
189 if magic <> self.MAGIC:
190 raise IOError(0, 'Bad magic number', filename)
191 #
192 # Now put all messages from the .mo file buffer into the catalog
193 # dictionary.
194 for i in xrange(0, msgcount):
195 mstart = unpack('<i', buf[masteridx+4:masteridx+8])[0]
196 mend = mstart + unpack('<i', buf[masteridx:masteridx+4])[0]
197 tstart = unpack('<i', buf[transidx+4:transidx+8])[0]
198 tend = tstart + unpack('<i', buf[transidx:transidx+4])[0]
199 if mend < len(buf) and tend < len(buf):
200 catalog[buf[mstart:mend]] = buf[tstart:tend]
201 else:
202 raise IOError(0, 'File is corrupt', filename)
203 #
204 # advance to next entry in the seek tables
Barry Warsawfa488ec2000-08-25 20:26:43 +0000205 masteridx += 8
206 transidx += 8
Barry Warsaw95be23d2000-08-25 19:13:37 +0000207 return catalog
208
209
210
211# By default, use GNU gettext format .mo files
212Translations = GNUTranslations
213
214# Locate a .mo file using the gettext strategy
Barry Warsaw84314b72000-08-25 19:53:17 +0000215def _find(localedir=None, languages=None, domain=None):
Barry Warsaw95be23d2000-08-25 19:13:37 +0000216 global _current_domain
217 global _localedirs
Barry Warsaw95be23d2000-08-25 19:13:37 +0000218 # Get some reasonable defaults for arguments that were not supplied
219 if domain is None:
220 domain = _current_domain
Barry Warsaw95be23d2000-08-25 19:13:37 +0000221 if localedir is None:
222 localedir = _localedirs.get(
223 domain,
224 # TBD: The default localedir is actually system dependent. I
225 # don't know of a good platform-consistent and portable way to
226 # default it, so instead, we'll just use sys.prefix. Most
227 # programs should be calling bindtextdomain() or such explicitly
228 # anyway.
229 os.path.join(sys.prefix, 'share', 'locale'))
230 if languages is None:
231 languages = []
232 for envar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):
233 val = os.environ.get(envar)
234 if val:
235 languages = val.split(':')
236 break
237 if 'C' not in languages:
238 languages.append('C')
Barry Warsawfa488ec2000-08-25 20:26:43 +0000239 # now normalize and expand the languages
240 langdict = {}
241 for lang in languages:
242 for nelang in _expand_lang(lang):
243 langdict[nelang] = nelang
244 languages = langdict.keys()
Barry Warsaw95be23d2000-08-25 19:13:37 +0000245 # select a language
246 for lang in languages:
247 if lang == 'C':
248 break
Barry Warsaw84314b72000-08-25 19:53:17 +0000249 mofile = os.path.join(localedir, lang, 'LC_MESSAGES', '%s.mo' % domain)
Barry Warsaw95be23d2000-08-25 19:13:37 +0000250 # see if it's in the cache
251 mo = _translations.get(mofile)
252 if mo:
253 return mo
254 fp = None
255 try:
256 try:
257 fp = open(mofile, 'rb')
258 t = Translations(fp)
259 _translations[mofile] = t
260 return t
261 except IOError:
262 pass
263 finally:
264 if fp:
265 fp.close()
266 return {}
267
268
269
270def bindtextdomain(domain=None, localedir=None):
271 """Bind domain to a file in the specified directory."""
272 global _localedirs
273 if domain is None:
274 return None
275 if localedir is None:
276 return _localedirs.get(domain, _localedirs.get('C'))
277 _localedirs[domain] = localedir
278 return localedir
279
280
281def textdomain(domain=None):
282 """Change or query the current global domain."""
283 global _current_domain
284 if domain is None:
285 return _current_domain
286 else:
287 _current_domain = domain
288 return domain
289
290
291def gettext(message):
292 """Return localized version of a message."""
293 return _find().get(message, message)
294
295
296def dgettext(domain, message):
297 """Like gettext(), but look up message in specified domain."""
298 return _find(domain=domain).get(message, message)
299
300
Barry Warsaw95be23d2000-08-25 19:13:37 +0000301
302# A higher level API
303def set(translation):
304 global _current_translation
305 _current_translation = translation
306
307
308def get():
309 global _current_translation
310 return _current_translation
311
312
313def install(domain=None, localedir=None):
314 import __builtin__
315 __builtin__.__dict__['_'] = gettext
316 if domain is not None:
317 bindtextdomain(domain, localedir)
318 textdomain(domain)