blob: 74c7c3809a38a0e43876a38241e5a57786700048 [file] [log] [blame]
Barry Warsaw95be23d2000-08-25 19:13:37 +00001"""Internationalization and localization support.
2
3This module provides internationalization (I18N) and localization (L10N)
4support for your Python programs by providing an interface to the GNU gettext
5message catalog library.
6
7I18N refers to the operation by which a program is made aware of multiple
8languages. L10N refers to the adaptation of your program, once
9internationalized, to the local language and cultural habits. In order to
10provide multilingual messages for your Python programs, you need to take the
11following steps:
12
13 - prepare your program by specially marking translatable strings
14 - run a suite of tools over your marked program files to generate raw
15 messages catalogs
16 - create language specific translations of the message catalogs
17 - use this module so that message strings are properly translated
18
19In order to prepare your program for I18N, you need to look at all the strings
20in your program. Any string that needs to be translated should be marked by
21wrapping it in _('...') -- i.e. a call to the function `_'. For example:
22
23 filename = 'mylog.txt'
24 message = _('writing a log message')
25 fp = open(filename, 'w')
26 fp.write(message)
27 fp.close()
28
29In this example, the string `writing a log message' is marked as a candidate
30for translation, while the strings `mylog.txt' and `w' are not.
31
32The GNU gettext package provides a tool, called xgettext, that scans C and C++
33source code looking for these specially marked strings. xgettext generates
34what are called `.pot' files, essentially structured human readable files
35which contain every marked string in the source code. These .pot files are
36copied and handed over to translators who write language-specific versions for
37every supported language.
38
39For I18N Python programs however, xgettext won't work; it doesn't understand
40the myriad of string types support by Python. The standard Python
41distribution provides a tool called pygettext that does though (found in the
42Tools/i18n directory). This is a command line script that supports a similar
43interface as xgettext; see its documentation for details. Once you've used
44pygettext to create your .pot files, you can use the standard GNU gettext
45tools to generate your machine-readable .mo files, which are what's used by
46this module.
47
48In the simple case, to use this module then, you need only add the following
49bit of code to the main driver file of your application:
50
51 import gettext
52 gettext.install()
53
54This sets everything up so that your _('...') function calls Just Work. In
55other words, it installs `_' in the builtins namespace for convenience. You
56can skip this step and do it manually by the equivalent code:
57
58 import gettext
59 import __builtin__
60 __builtin__['_'] = gettext.gettext
61
62Once you've done this, you probably want to call bindtextdomain() and
63textdomain() to get the domain set up properly. Again, for convenience, you
64can pass the domain and localedir to install to set everything up in one fell
65swoop:
66
67 import gettext
68 gettext.install('mydomain', '/my/locale/dir')
69
70If your program needs to support many languages at the same time, you will
71want to create Translation objects explicitly, like so:
72
73 import gettext
74 gettext.install()
75
76 lang1 = gettext.Translations(open('/path/to/my/lang1/messages.mo'))
77 lang2 = gettext.Translations(open('/path/to/my/lang2/messages.mo'))
78 lang3 = gettext.Translations(open('/path/to/my/lang3/messages.mo'))
79
80 gettext.set(lang1)
81 # all _() will now translate to language 1
82 gettext.set(lang2)
83 # all _() will now translate to language 2
84
85Currently, only GNU gettext format binary .mo files are supported.
86
87"""
88
89# This module represents the integration of work from the following authors:
90#
91# Martin von Loewis, who wrote the initial implementation of the underlying
92# C-based libintlmodule (later renamed _gettext), along with a skeletal
93# gettext.py implementation.
94#
95# Peter Funk, who wrote fintl.py, a fairly complete wrapper around intlmodule,
96# which also included a pure-Python implementation to read .mo files if
97# intlmodule wasn't available.
98#
99# James Henstridge, who also wrote a gettext.py module, which has some
100# interesting, but currently unsupported experimental features: the notion of
101# a Catalog class and instances, and the ability to add to a catalog file via
102# a Python API.
103#
104# Barry Warsaw integrated these modules, wrote the .install() API and code,
105# and conformed all C and Python code to Python's coding standards.
106
107import os
108import sys
109import struct
110from UserDict import UserDict
111
112
113
114# globals
115_translations = {}
116_current_translation = None
117_current_domain = 'messages'
118
119# Domain to directory mapping, for use by bindtextdomain()
120_localedirs = {}
121
122
123
124class GNUTranslations(UserDict):
125 # Magic number of .mo files
126 MAGIC = 0x950412de
127
128 def __init__(self, fp):
129 if fp is None:
130 d = {}
131 else:
132 d = self._parse(fp)
133 UserDict.__init__(self, d)
134
135 def _parse(self, fp):
136 """Override this method to support alternative .mo formats."""
137 unpack = struct.unpack
138 filename = getattr(fp, 'name', '')
139 # Parse the .mo file header, which consists of 5 little endian 32
140 # bit words.
141 catalog = {}
142 buf = fp.read()
143 magic, version, msgcount, masteridx, transidx = unpack(
144 '<5i', buf[:20])
145 if magic <> self.MAGIC:
146 raise IOError(0, 'Bad magic number', filename)
147 #
148 # Now put all messages from the .mo file buffer into the catalog
149 # dictionary.
150 for i in xrange(0, msgcount):
151 mstart = unpack('<i', buf[masteridx+4:masteridx+8])[0]
152 mend = mstart + unpack('<i', buf[masteridx:masteridx+4])[0]
153 tstart = unpack('<i', buf[transidx+4:transidx+8])[0]
154 tend = tstart + unpack('<i', buf[transidx:transidx+4])[0]
155 if mend < len(buf) and tend < len(buf):
156 catalog[buf[mstart:mend]] = buf[tstart:tend]
157 else:
158 raise IOError(0, 'File is corrupt', filename)
159 #
160 # advance to next entry in the seek tables
161 masteridx = masteridx + 8
162 transidx = transidx + 8
163 return catalog
164
165
166
167# By default, use GNU gettext format .mo files
168Translations = GNUTranslations
169
170# Locate a .mo file using the gettext strategy
Barry Warsaw84314b72000-08-25 19:53:17 +0000171def _find(localedir=None, languages=None, domain=None):
Barry Warsaw95be23d2000-08-25 19:13:37 +0000172 global _current_domain
173 global _localedirs
174
175 # Get some reasonable defaults for arguments that were not supplied
176 if domain is None:
177 domain = _current_domain
Barry Warsaw95be23d2000-08-25 19:13:37 +0000178 if localedir is None:
179 localedir = _localedirs.get(
180 domain,
181 # TBD: The default localedir is actually system dependent. I
182 # don't know of a good platform-consistent and portable way to
183 # default it, so instead, we'll just use sys.prefix. Most
184 # programs should be calling bindtextdomain() or such explicitly
185 # anyway.
186 os.path.join(sys.prefix, 'share', 'locale'))
187 if languages is None:
188 languages = []
189 for envar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):
190 val = os.environ.get(envar)
191 if val:
192 languages = val.split(':')
193 break
194 if 'C' not in languages:
195 languages.append('C')
196 # select a language
197 for lang in languages:
198 if lang == 'C':
199 break
Barry Warsaw84314b72000-08-25 19:53:17 +0000200 mofile = os.path.join(localedir, lang, 'LC_MESSAGES', '%s.mo' % domain)
Barry Warsaw95be23d2000-08-25 19:13:37 +0000201 # see if it's in the cache
202 mo = _translations.get(mofile)
203 if mo:
204 return mo
205 fp = None
206 try:
207 try:
208 fp = open(mofile, 'rb')
209 t = Translations(fp)
210 _translations[mofile] = t
211 return t
212 except IOError:
213 pass
214 finally:
215 if fp:
216 fp.close()
217 return {}
218
219
220
221def bindtextdomain(domain=None, localedir=None):
222 """Bind domain to a file in the specified directory."""
223 global _localedirs
224 if domain is None:
225 return None
226 if localedir is None:
227 return _localedirs.get(domain, _localedirs.get('C'))
228 _localedirs[domain] = localedir
229 return localedir
230
231
232def textdomain(domain=None):
233 """Change or query the current global domain."""
234 global _current_domain
235 if domain is None:
236 return _current_domain
237 else:
238 _current_domain = domain
239 return domain
240
241
242def gettext(message):
243 """Return localized version of a message."""
244 return _find().get(message, message)
245
246
247def dgettext(domain, message):
248 """Like gettext(), but look up message in specified domain."""
249 return _find(domain=domain).get(message, message)
250
251
Barry Warsaw95be23d2000-08-25 19:13:37 +0000252
253# A higher level API
254def set(translation):
255 global _current_translation
256 _current_translation = translation
257
258
259def get():
260 global _current_translation
261 return _current_translation
262
263
264def install(domain=None, localedir=None):
265 import __builtin__
266 __builtin__.__dict__['_'] = gettext
267 if domain is not None:
268 bindtextdomain(domain, localedir)
269 textdomain(domain)