blob: a7f89b2aeab62742698522fd1048266b5e6cb764 [file] [log] [blame]
Barry Warsaw95be23d2000-08-25 19:13:37 +00001"""Internationalization and localization support.
2
3This module provides internationalization (I18N) and localization (L10N)
4support for your Python programs by providing an interface to the GNU gettext
5message catalog library.
6
7I18N refers to the operation by which a program is made aware of multiple
8languages. L10N refers to the adaptation of your program, once
9internationalized, to the local language and cultural habits. In order to
10provide multilingual messages for your Python programs, you need to take the
11following steps:
12
13 - prepare your program by specially marking translatable strings
14 - run a suite of tools over your marked program files to generate raw
15 messages catalogs
16 - create language specific translations of the message catalogs
17 - use this module so that message strings are properly translated
18
19In order to prepare your program for I18N, you need to look at all the strings
20in your program. Any string that needs to be translated should be marked by
21wrapping it in _('...') -- i.e. a call to the function `_'. For example:
22
23 filename = 'mylog.txt'
24 message = _('writing a log message')
25 fp = open(filename, 'w')
26 fp.write(message)
27 fp.close()
28
29In this example, the string `writing a log message' is marked as a candidate
30for translation, while the strings `mylog.txt' and `w' are not.
31
32The GNU gettext package provides a tool, called xgettext, that scans C and C++
33source code looking for these specially marked strings. xgettext generates
34what are called `.pot' files, essentially structured human readable files
35which contain every marked string in the source code. These .pot files are
36copied and handed over to translators who write language-specific versions for
37every supported language.
38
39For I18N Python programs however, xgettext won't work; it doesn't understand
40the myriad of string types support by Python. The standard Python
41distribution provides a tool called pygettext that does though (found in the
42Tools/i18n directory). This is a command line script that supports a similar
43interface as xgettext; see its documentation for details. Once you've used
44pygettext to create your .pot files, you can use the standard GNU gettext
45tools to generate your machine-readable .mo files, which are what's used by
46this module.
47
48In the simple case, to use this module then, you need only add the following
49bit of code to the main driver file of your application:
50
51 import gettext
52 gettext.install()
53
54This sets everything up so that your _('...') function calls Just Work. In
55other words, it installs `_' in the builtins namespace for convenience. You
56can skip this step and do it manually by the equivalent code:
57
58 import gettext
59 import __builtin__
60 __builtin__['_'] = gettext.gettext
61
62Once you've done this, you probably want to call bindtextdomain() and
63textdomain() to get the domain set up properly. Again, for convenience, you
64can pass the domain and localedir to install to set everything up in one fell
65swoop:
66
67 import gettext
68 gettext.install('mydomain', '/my/locale/dir')
69
70If your program needs to support many languages at the same time, you will
71want to create Translation objects explicitly, like so:
72
73 import gettext
74 gettext.install()
75
76 lang1 = gettext.Translations(open('/path/to/my/lang1/messages.mo'))
77 lang2 = gettext.Translations(open('/path/to/my/lang2/messages.mo'))
78 lang3 = gettext.Translations(open('/path/to/my/lang3/messages.mo'))
79
80 gettext.set(lang1)
81 # all _() will now translate to language 1
82 gettext.set(lang2)
83 # all _() will now translate to language 2
84
85Currently, only GNU gettext format binary .mo files are supported.
86
87"""
88
89# This module represents the integration of work from the following authors:
90#
91# Martin von Loewis, who wrote the initial implementation of the underlying
92# C-based libintlmodule (later renamed _gettext), along with a skeletal
93# gettext.py implementation.
94#
95# Peter Funk, who wrote fintl.py, a fairly complete wrapper around intlmodule,
96# which also included a pure-Python implementation to read .mo files if
97# intlmodule wasn't available.
98#
99# James Henstridge, who also wrote a gettext.py module, which has some
100# interesting, but currently unsupported experimental features: the notion of
101# a Catalog class and instances, and the ability to add to a catalog file via
102# a Python API.
103#
104# Barry Warsaw integrated these modules, wrote the .install() API and code,
105# and conformed all C and Python code to Python's coding standards.
106
107import os
108import sys
109import struct
110from UserDict import UserDict
111
112
113
114# globals
115_translations = {}
116_current_translation = None
117_current_domain = 'messages'
118
119# Domain to directory mapping, for use by bindtextdomain()
120_localedirs = {}
121
122
123
124class GNUTranslations(UserDict):
125 # Magic number of .mo files
126 MAGIC = 0x950412de
127
128 def __init__(self, fp):
129 if fp is None:
130 d = {}
131 else:
132 d = self._parse(fp)
133 UserDict.__init__(self, d)
134
135 def _parse(self, fp):
136 """Override this method to support alternative .mo formats."""
137 unpack = struct.unpack
138 filename = getattr(fp, 'name', '')
139 # Parse the .mo file header, which consists of 5 little endian 32
140 # bit words.
141 catalog = {}
142 buf = fp.read()
143 magic, version, msgcount, masteridx, transidx = unpack(
144 '<5i', buf[:20])
145 if magic <> self.MAGIC:
146 raise IOError(0, 'Bad magic number', filename)
147 #
148 # Now put all messages from the .mo file buffer into the catalog
149 # dictionary.
150 for i in xrange(0, msgcount):
151 mstart = unpack('<i', buf[masteridx+4:masteridx+8])[0]
152 mend = mstart + unpack('<i', buf[masteridx:masteridx+4])[0]
153 tstart = unpack('<i', buf[transidx+4:transidx+8])[0]
154 tend = tstart + unpack('<i', buf[transidx:transidx+4])[0]
155 if mend < len(buf) and tend < len(buf):
156 catalog[buf[mstart:mend]] = buf[tstart:tend]
157 else:
158 raise IOError(0, 'File is corrupt', filename)
159 #
160 # advance to next entry in the seek tables
161 masteridx = masteridx + 8
162 transidx = transidx + 8
163 return catalog
164
165
166
167# By default, use GNU gettext format .mo files
168Translations = GNUTranslations
169
170# Locate a .mo file using the gettext strategy
171def _find(localedir=None, languages=None, category=None, domain=None):
172 global _current_domain
173 global _localedirs
174
175 # Get some reasonable defaults for arguments that were not supplied
176 if domain is None:
177 domain = _current_domain
178 if category is None:
179 category = 'LC_MESSAGES'
180 if localedir is None:
181 localedir = _localedirs.get(
182 domain,
183 # TBD: The default localedir is actually system dependent. I
184 # don't know of a good platform-consistent and portable way to
185 # default it, so instead, we'll just use sys.prefix. Most
186 # programs should be calling bindtextdomain() or such explicitly
187 # anyway.
188 os.path.join(sys.prefix, 'share', 'locale'))
189 if languages is None:
190 languages = []
191 for envar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):
192 val = os.environ.get(envar)
193 if val:
194 languages = val.split(':')
195 break
196 if 'C' not in languages:
197 languages.append('C')
198 # select a language
199 for lang in languages:
200 if lang == 'C':
201 break
202 mofile = os.path.join(localedir, lang, category, '%s.mo' % domain)
203 # see if it's in the cache
204 mo = _translations.get(mofile)
205 if mo:
206 return mo
207 fp = None
208 try:
209 try:
210 fp = open(mofile, 'rb')
211 t = Translations(fp)
212 _translations[mofile] = t
213 return t
214 except IOError:
215 pass
216 finally:
217 if fp:
218 fp.close()
219 return {}
220
221
222
223def bindtextdomain(domain=None, localedir=None):
224 """Bind domain to a file in the specified directory."""
225 global _localedirs
226 if domain is None:
227 return None
228 if localedir is None:
229 return _localedirs.get(domain, _localedirs.get('C'))
230 _localedirs[domain] = localedir
231 return localedir
232
233
234def textdomain(domain=None):
235 """Change or query the current global domain."""
236 global _current_domain
237 if domain is None:
238 return _current_domain
239 else:
240 _current_domain = domain
241 return domain
242
243
244def gettext(message):
245 """Return localized version of a message."""
246 return _find().get(message, message)
247
248
249def dgettext(domain, message):
250 """Like gettext(), but look up message in specified domain."""
251 return _find(domain=domain).get(message, message)
252
253
254def dcgettext(domain, message, category):
255 try:
256 from locale import LC_CTYPE, LC_TIME, LC_COLLATE
257 from locale import LC_MONETARY, LC_MESSAGES, LC_NUMERIC
258 except ImportError:
259 return message
260 categories = {
261 LC_CTYPE : 'LC_CTYPE',
262 LC_TIME : 'LC_TIME',
263 LC_COLLATE : 'LC_COLLATE',
264 LC_MONETARY : 'LC_MONETARY',
265 LC_MESSAGES : 'LC_MESSAGES',
266 LC_NUMERIC : 'LC_NUMERIC'
267 }
268 return _find(domain=domain, category=category).get(message, message)
269
270
271
272# A higher level API
273def set(translation):
274 global _current_translation
275 _current_translation = translation
276
277
278def get():
279 global _current_translation
280 return _current_translation
281
282
283def install(domain=None, localedir=None):
284 import __builtin__
285 __builtin__.__dict__['_'] = gettext
286 if domain is not None:
287 bindtextdomain(domain, localedir)
288 textdomain(domain)