| """Internationalization and localization support. | 
 |  | 
 | This module provides internationalization (I18N) and localization (L10N) | 
 | support for your Python programs by providing an interface to the GNU gettext | 
 | message catalog library. | 
 |  | 
 | I18N refers to the operation by which a program is made aware of multiple | 
 | languages.  L10N refers to the adaptation of your program, once | 
 | internationalized, to the local language and cultural habits. | 
 |  | 
 | """ | 
 |  | 
 | # This module represents the integration of work, contributions, feedback, and | 
 | # suggestions from the following people: | 
 | # | 
 | # Martin von Loewis, who wrote the initial implementation of the underlying | 
 | # C-based libintlmodule (later renamed _gettext), along with a skeletal | 
 | # gettext.py implementation. | 
 | # | 
 | # Peter Funk, who wrote fintl.py, a fairly complete wrapper around intlmodule, | 
 | # which also included a pure-Python implementation to read .mo files if | 
 | # intlmodule wasn't available. | 
 | # | 
 | # James Henstridge, who also wrote a gettext.py module, which has some | 
 | # interesting, but currently unsupported experimental features: the notion of | 
 | # a Catalog class and instances, and the ability to add to a catalog file via | 
 | # a Python API. | 
 | # | 
 | # Barry Warsaw integrated these modules, wrote the .install() API and code, | 
 | # and conformed all C and Python code to Python's coding standards. | 
 | # | 
 | # Francois Pinard and Marc-Andre Lemburg also contributed valuably to this | 
 | # module. | 
 | # | 
 | # TODO: | 
 | # - Lazy loading of .mo files.  Currently the entire catalog is loaded into | 
 | #   memory, but that's probably bad for large translated programs.  Instead, | 
 | #   the lexical sort of original strings in GNU .mo files should be exploited | 
 | #   to do binary searches and lazy initializations.  Or you might want to use | 
 | #   the undocumented double-hash algorithm for .mo files with hash tables, but | 
 | #   you'll need to study the GNU gettext code to do this. | 
 | # | 
 | # - Support Solaris .mo file formats.  Unfortunately, we've been unable to | 
 | #   find this format documented anywhere. | 
 |  | 
 | import os | 
 | import sys | 
 | import struct | 
 | import copy | 
 | from errno import ENOENT | 
 |  | 
 | __all__ = ["bindtextdomain","textdomain","gettext","dgettext", | 
 |            "find","translation","install","Catalog"] | 
 |  | 
 | _default_localedir = os.path.join(sys.prefix, 'share', 'locale') | 
 |  | 
 |  | 
 |  | 
 | def _expand_lang(locale): | 
 |     from locale import normalize | 
 |     locale = normalize(locale) | 
 |     COMPONENT_CODESET   = 1 << 0 | 
 |     COMPONENT_TERRITORY = 1 << 1 | 
 |     COMPONENT_MODIFIER  = 1 << 2 | 
 |     # split up the locale into its base components | 
 |     mask = 0 | 
 |     pos = locale.find('@') | 
 |     if pos >= 0: | 
 |         modifier = locale[pos:] | 
 |         locale = locale[:pos] | 
 |         mask |= COMPONENT_MODIFIER | 
 |     else: | 
 |         modifier = '' | 
 |     pos = locale.find('.') | 
 |     if pos >= 0: | 
 |         codeset = locale[pos:] | 
 |         locale = locale[:pos] | 
 |         mask |= COMPONENT_CODESET | 
 |     else: | 
 |         codeset = '' | 
 |     pos = locale.find('_') | 
 |     if pos >= 0: | 
 |         territory = locale[pos:] | 
 |         locale = locale[:pos] | 
 |         mask |= COMPONENT_TERRITORY | 
 |     else: | 
 |         territory = '' | 
 |     language = locale | 
 |     ret = [] | 
 |     for i in range(mask+1): | 
 |         if not (i & ~mask):  # if all components for this combo exist ... | 
 |             val = language | 
 |             if i & COMPONENT_TERRITORY: val += territory | 
 |             if i & COMPONENT_CODESET:   val += codeset | 
 |             if i & COMPONENT_MODIFIER:  val += modifier | 
 |             ret.append(val) | 
 |     ret.reverse() | 
 |     return ret | 
 |  | 
 |  | 
 |  | 
 | class NullTranslations: | 
 |     def __init__(self, fp=None): | 
 |         self._info = {} | 
 |         self._charset = None | 
 |         self._fallback = None | 
 |         if fp is not None: | 
 |             self._parse(fp) | 
 |  | 
 |     def _parse(self, fp): | 
 |         pass | 
 |  | 
 |     def add_fallback(self, fallback): | 
 |         if self._fallback: | 
 |             self._fallback.add_fallback(fallback) | 
 |         else: | 
 |             self._fallback = fallback | 
 |  | 
 |     def gettext(self, message): | 
 |         if self._fallback: | 
 |             return self._fallback.gettext(message) | 
 |         return message | 
 |  | 
 |     def ugettext(self, message): | 
 |         if self._fallback: | 
 |             return self._fallback.ugettext(message) | 
 |         return unicode(message) | 
 |  | 
 |     def info(self): | 
 |         return self._info | 
 |  | 
 |     def charset(self): | 
 |         return self._charset | 
 |  | 
 |     def install(self, unicode=0): | 
 |         import __builtin__ | 
 |         __builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext | 
 |  | 
 |  | 
 | class GNUTranslations(NullTranslations): | 
 |     # Magic number of .mo files | 
 |     LE_MAGIC = 0x950412de | 
 |     BE_MAGIC = 0xde120495 | 
 |  | 
 |     def _parse(self, fp): | 
 |         """Override this method to support alternative .mo formats.""" | 
 |         # We need to & all 32 bit unsigned integers with 0xffffffff for | 
 |         # portability to 64 bit machines. | 
 |         MASK = 0xffffffff | 
 |         unpack = struct.unpack | 
 |         filename = getattr(fp, 'name', '') | 
 |         # Parse the .mo file header, which consists of 5 little endian 32 | 
 |         # bit words. | 
 |         self._catalog = catalog = {} | 
 |         buf = fp.read() | 
 |         buflen = len(buf) | 
 |         # Are we big endian or little endian? | 
 |         magic = unpack('<i', buf[:4])[0] & MASK | 
 |         if magic == self.LE_MAGIC: | 
 |             version, msgcount, masteridx, transidx = unpack('<4i', buf[4:20]) | 
 |             ii = '<ii' | 
 |         elif magic == self.BE_MAGIC: | 
 |             version, msgcount, masteridx, transidx = unpack('>4i', buf[4:20]) | 
 |             ii = '>ii' | 
 |         else: | 
 |             raise IOError(0, 'Bad magic number', filename) | 
 |         # more unsigned ints | 
 |         msgcount &= MASK | 
 |         masteridx &= MASK | 
 |         transidx &= MASK | 
 |         # Now put all messages from the .mo file buffer into the catalog | 
 |         # dictionary. | 
 |         for i in xrange(0, msgcount): | 
 |             mlen, moff = unpack(ii, buf[masteridx:masteridx+8]) | 
 |             moff &= MASK | 
 |             mend = moff + (mlen & MASK) | 
 |             tlen, toff = unpack(ii, buf[transidx:transidx+8]) | 
 |             toff &= MASK | 
 |             tend = toff + (tlen & MASK) | 
 |             if mend < buflen and tend < buflen: | 
 |                 tmsg = buf[toff:tend] | 
 |                 catalog[buf[moff:mend]] = tmsg | 
 |             else: | 
 |                 raise IOError(0, 'File is corrupt', filename) | 
 |             # See if we're looking at GNU .mo conventions for metadata | 
 |             if mlen == 0 and tmsg.lower().startswith('project-id-version:'): | 
 |                 # Catalog description | 
 |                 for item in tmsg.split('\n'): | 
 |                     item = item.strip() | 
 |                     if not item: | 
 |                         continue | 
 |                     k, v = item.split(':', 1) | 
 |                     k = k.strip().lower() | 
 |                     v = v.strip() | 
 |                     self._info[k] = v | 
 |                     if k == 'content-type': | 
 |                         self._charset = v.split('charset=')[1] | 
 |             # advance to next entry in the seek tables | 
 |             masteridx += 8 | 
 |             transidx += 8 | 
 |  | 
 |     def gettext(self, message): | 
 |         try: | 
 |             return self._catalog[message] | 
 |         except KeyError: | 
 |             if self._fallback: | 
 |                 return self._fallback.gettext(message) | 
 |             return message | 
 |  | 
 |     def ugettext(self, message): | 
 |         try: | 
 |             tmsg = self._catalog[message] | 
 |         except KeyError: | 
 |             if self._fallback: | 
 |                 return self._fallback.ugettext(message) | 
 |             tmsg = message | 
 |         return unicode(tmsg, self._charset) | 
 |  | 
 |  | 
 |  | 
 | # Locate a .mo file using the gettext strategy | 
 | def find(domain, localedir=None, languages=None, all=0): | 
 |     # Get some reasonable defaults for arguments that were not supplied | 
 |     if localedir is None: | 
 |         localedir = _default_localedir | 
 |     if languages is None: | 
 |         languages = [] | 
 |         for envar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'): | 
 |             val = os.environ.get(envar) | 
 |             if val: | 
 |                 languages = val.split(':') | 
 |                 break | 
 |         if 'C' not in languages: | 
 |             languages.append('C') | 
 |     # now normalize and expand the languages | 
 |     nelangs = [] | 
 |     for lang in languages: | 
 |         for nelang in _expand_lang(lang): | 
 |             if nelang not in nelangs: | 
 |                 nelangs.append(nelang) | 
 |     # select a language | 
 |     if all: | 
 |         result = [] | 
 |     else: | 
 |         result = None | 
 |     for lang in nelangs: | 
 |         if lang == 'C': | 
 |             break | 
 |         mofile = os.path.join(localedir, lang, 'LC_MESSAGES', '%s.mo' % domain) | 
 |         if os.path.exists(mofile): | 
 |             if all: | 
 |                 result.append(mofile) | 
 |             else: | 
 |                 return mofile | 
 |     return result | 
 |  | 
 |  | 
 |  | 
 | # a mapping between absolute .mo file path and Translation object | 
 | _translations = {} | 
 |  | 
 | def translation(domain, localedir=None, languages=None, | 
 |                 class_=None, fallback=0): | 
 |     if class_ is None: | 
 |         class_ = GNUTranslations | 
 |     mofiles = find(domain, localedir, languages, all=1) | 
 |     if len(mofiles)==0: | 
 |         if fallback: | 
 |             return NullTranslations() | 
 |         raise IOError(ENOENT, 'No translation file found for domain', domain) | 
 |     # TBD: do we need to worry about the file pointer getting collected? | 
 |     # Avoid opening, reading, and parsing the .mo file after it's been done | 
 |     # once. | 
 |     result = None | 
 |     for mofile in mofiles: | 
 |         key = os.path.abspath(mofile) | 
 |         t = _translations.get(key) | 
 |         if t is None: | 
 |             t = _translations.setdefault(key, class_(open(mofile, 'rb'))) | 
 |         # Copy the translation object to allow setting fallbacks. | 
 |         # All other instance data is shared with the cached object. | 
 |         t = copy.copy(t) | 
 |         if result is None: | 
 |             result = t | 
 |         else: | 
 |             result.add_fallback(t) | 
 |     return result | 
 |  | 
 |  | 
 | def install(domain, localedir=None, unicode=0): | 
 |     translation(domain, localedir, fallback=1).install(unicode) | 
 |  | 
 |  | 
 |  | 
 | # a mapping b/w domains and locale directories | 
 | _localedirs = {} | 
 | # current global domain, `messages' used for compatibility w/ GNU gettext | 
 | _current_domain = 'messages' | 
 |  | 
 |  | 
 | def textdomain(domain=None): | 
 |     global _current_domain | 
 |     if domain is not None: | 
 |         _current_domain = domain | 
 |     return _current_domain | 
 |  | 
 |  | 
 | def bindtextdomain(domain, localedir=None): | 
 |     global _localedirs | 
 |     if localedir is not None: | 
 |         _localedirs[domain] = localedir | 
 |     return _localedirs.get(domain, _default_localedir) | 
 |  | 
 |  | 
 | def dgettext(domain, message): | 
 |     try: | 
 |         t = translation(domain, _localedirs.get(domain, None)) | 
 |     except IOError: | 
 |         return message | 
 |     return t.gettext(message) | 
 |  | 
 |  | 
 | def gettext(message): | 
 |     return dgettext(_current_domain, message) | 
 |  | 
 |  | 
 | # dcgettext() has been deemed unnecessary and is not implemented. | 
 |  | 
 | # James Henstridge's Catalog constructor from GNOME gettext.  Documented usage | 
 | # was: | 
 | # | 
 | #    import gettext | 
 | #    cat = gettext.Catalog(PACKAGE, localedir=LOCALEDIR) | 
 | #    _ = cat.gettext | 
 | #    print _('Hello World') | 
 |  | 
 | # The resulting catalog object currently don't support access through a | 
 | # dictionary API, which was supported (but apparently unused) in GNOME | 
 | # gettext. | 
 |  | 
 | Catalog = translation |