|  | #! /usr/bin/env python | 
|  | # Originally written by Barry Warsaw <bwarsaw@python.org> | 
|  | # | 
|  | # minimally patched to make it even more xgettext compatible | 
|  | # by Peter Funk <pf@artcom-gmbh.de> | 
|  |  | 
|  | """pygettext -- Python equivalent of xgettext(1) | 
|  |  | 
|  | Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the | 
|  | internationalization of C programs.  Most of these tools are independent of | 
|  | the programming language and can be used from within Python programs.  Martin | 
|  | von Loewis' work[1] helps considerably in this regard. | 
|  |  | 
|  | There's one problem though; xgettext is the program that scans source code | 
|  | looking for message strings, but it groks only C (or C++).  Python introduces | 
|  | a few wrinkles, such as dual quoting characters, triple quoted strings, and | 
|  | raw strings.  xgettext understands none of this. | 
|  |  | 
|  | Enter pygettext, which uses Python's standard tokenize module to scan Python | 
|  | source code, generating .pot files identical to what GNU xgettext[2] generates | 
|  | for C and C++ code.  From there, the standard GNU tools can be used. | 
|  |  | 
|  | A word about marking Python strings as candidates for translation.  GNU | 
|  | xgettext recognizes the following keywords: gettext, dgettext, dcgettext, and | 
|  | gettext_noop.  But those can be a lot of text to include all over your code. | 
|  | C and C++ have a trick: they use the C preprocessor.  Most internationalized C | 
|  | source includes a #define for gettext() to _() so that what has to be written | 
|  | in the source is much less.  Thus these are both translatable strings: | 
|  |  | 
|  | gettext("Translatable String") | 
|  | _("Translatable String") | 
|  |  | 
|  | Python of course has no preprocessor so this doesn't work so well.  Thus, | 
|  | pygettext searches only for _() by default, but see the -k/--keyword flag | 
|  | below for how to augment this. | 
|  |  | 
|  | [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html | 
|  | [2] http://www.gnu.org/software/gettext/gettext.html | 
|  |  | 
|  | NOTE: pygettext attempts to be option and feature compatible with GNU xgettext | 
|  | where ever possible.  However some options are still missing or are not fully | 
|  | implemented.  Also, xgettext's use of command line switches with option | 
|  | arguments is broken, and in these cases, pygettext just defines additional | 
|  | switches. | 
|  |  | 
|  | Usage: pygettext [options] inputfile ... | 
|  |  | 
|  | Options: | 
|  |  | 
|  | -a | 
|  | --extract-all | 
|  | Extract all strings | 
|  |  | 
|  | -d name | 
|  | --default-domain=name | 
|  | Rename the default output file from messages.pot to name.pot | 
|  |  | 
|  | -E | 
|  | --escape | 
|  | Replace non-ASCII characters with octal escape sequences. | 
|  |  | 
|  | -D | 
|  | --docstrings | 
|  | Extract module, class, method, and function docstrings.  These do not | 
|  | need to be wrapped in _() markers, and in fact cannot be for Python to | 
|  | consider them docstrings. | 
|  |  | 
|  | -h | 
|  | --help | 
|  | print this help message and exit | 
|  |  | 
|  | -k word | 
|  | --keyword=word | 
|  | Keywords to look for in addition to the default set, which are: | 
|  | %(DEFAULTKEYWORDS)s | 
|  |  | 
|  | You can have multiple -k flags on the command line. | 
|  |  | 
|  | -K | 
|  | --no-default-keywords | 
|  | Disable the default set of keywords (see above).  Any keywords | 
|  | explicitly added with the -k/--keyword option are still recognized. | 
|  |  | 
|  | --no-location | 
|  | Do not write filename/lineno location comments. | 
|  |  | 
|  | -n | 
|  | --add-location | 
|  | Write filename/lineno location comments indicating where each | 
|  | extracted string is found in the source.  These lines appear before | 
|  | each msgid.  The style of comments is controlled by the -S/--style | 
|  | option.  This is the default. | 
|  |  | 
|  | -o filename | 
|  | --output=filename | 
|  | Rename the default output file from messages.pot to filename.  If | 
|  | filename is `-' then the output is sent to standard out. | 
|  |  | 
|  | -p dir | 
|  | --output-dir=dir | 
|  | Output files will be placed in directory dir. | 
|  |  | 
|  | -S stylename | 
|  | --style stylename | 
|  | Specify which style to use for location comments.  Two styles are | 
|  | supported: | 
|  |  | 
|  | Solaris  # File: filename, line: line-number | 
|  | GNU      #: filename:line | 
|  |  | 
|  | The style name is case insensitive.  GNU style is the default. | 
|  |  | 
|  | -v | 
|  | --verbose | 
|  | Print the names of the files being processed. | 
|  |  | 
|  | -V | 
|  | --version | 
|  | Print the version of pygettext and exit. | 
|  |  | 
|  | -w columns | 
|  | --width=columns | 
|  | Set width of output to columns. | 
|  |  | 
|  | -x filename | 
|  | --exclude-file=filename | 
|  | Specify a file that contains a list of strings that are not be | 
|  | extracted from the input files.  Each string to be excluded must | 
|  | appear on a line by itself in the file. | 
|  |  | 
|  | If `inputfile' is -, standard input is read. | 
|  |  | 
|  | """ | 
|  |  | 
|  | import os | 
|  | import sys | 
|  | import time | 
|  | import getopt | 
|  | import tokenize | 
|  |  | 
|  | # for selftesting | 
|  | try: | 
|  | import fintl | 
|  | _ = fintl.gettext | 
|  | except ImportError: | 
|  | def _(s): return s | 
|  |  | 
|  | __version__ = '1.3' | 
|  |  | 
|  | default_keywords = ['_'] | 
|  | DEFAULTKEYWORDS = ', '.join(default_keywords) | 
|  |  | 
|  | EMPTYSTRING = '' | 
|  |  | 
|  |  | 
|  |  | 
|  | # The normal pot-file header. msgmerge and EMACS' po-mode work better if | 
|  | # it's there. | 
|  | pot_header = _('''\ | 
|  | # SOME DESCRIPTIVE TITLE. | 
|  | # Copyright (C) YEAR ORGANIZATION | 
|  | # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. | 
|  | # | 
|  | msgid "" | 
|  | msgstr "" | 
|  | "Project-Id-Version: PACKAGE VERSION\\n" | 
|  | "POT-Creation-Date: %(time)s\\n" | 
|  | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n" | 
|  | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n" | 
|  | "Language-Team: LANGUAGE <LL@li.org>\\n" | 
|  | "MIME-Version: 1.0\\n" | 
|  | "Content-Type: text/plain; charset=CHARSET\\n" | 
|  | "Content-Transfer-Encoding: ENCODING\\n" | 
|  | "Generated-By: pygettext.py %(version)s\\n" | 
|  |  | 
|  | ''') | 
|  |  | 
|  |  | 
|  | def usage(code, msg=''): | 
|  | print >> sys.stderr, _(__doc__) % globals() | 
|  | if msg: | 
|  | print >> sys.stderr, msg | 
|  | sys.exit(code) | 
|  |  | 
|  |  | 
|  |  | 
|  | escapes = [] | 
|  |  | 
|  | def make_escapes(pass_iso8859): | 
|  | global escapes | 
|  | if pass_iso8859: | 
|  | # Allow iso-8859 characters to pass through so that e.g. 'msgid | 
|  | # "Höhe"' would result not result in 'msgid "H\366he"'.  Otherwise we | 
|  | # escape any character outside the 32..126 range. | 
|  | mod = 128 | 
|  | else: | 
|  | mod = 256 | 
|  | for i in range(256): | 
|  | if 32 <= (i % mod) <= 126: | 
|  | escapes.append(chr(i)) | 
|  | else: | 
|  | escapes.append("\\%03o" % i) | 
|  | escapes[ord('\\')] = '\\\\' | 
|  | escapes[ord('\t')] = '\\t' | 
|  | escapes[ord('\r')] = '\\r' | 
|  | escapes[ord('\n')] = '\\n' | 
|  | escapes[ord('\"')] = '\\"' | 
|  |  | 
|  |  | 
|  | def escape(s): | 
|  | global escapes | 
|  | s = list(s) | 
|  | for i in range(len(s)): | 
|  | s[i] = escapes[ord(s[i])] | 
|  | return EMPTYSTRING.join(s) | 
|  |  | 
|  |  | 
|  | def safe_eval(s): | 
|  | # unwrap quotes, safely | 
|  | return eval(s, {'__builtins__':{}}, {}) | 
|  |  | 
|  |  | 
|  | def normalize(s): | 
|  | # This converts the various Python string types into a format that is | 
|  | # appropriate for .po files, namely much closer to C style. | 
|  | lines = s.split('\n') | 
|  | if len(lines) == 1: | 
|  | s = '"' + escape(s) + '"' | 
|  | else: | 
|  | if not lines[-1]: | 
|  | del lines[-1] | 
|  | lines[-1] = lines[-1] + '\n' | 
|  | for i in range(len(lines)): | 
|  | lines[i] = escape(lines[i]) | 
|  | lineterm = '\\n"\n"' | 
|  | s = '""\n"' + lineterm.join(lines) + '"' | 
|  | return s | 
|  |  | 
|  |  | 
|  |  | 
|  | class TokenEater: | 
|  | def __init__(self, options): | 
|  | self.__options = options | 
|  | self.__messages = {} | 
|  | self.__state = self.__waiting | 
|  | self.__data = [] | 
|  | self.__lineno = -1 | 
|  | self.__freshmodule = 1 | 
|  |  | 
|  | def __call__(self, ttype, tstring, stup, etup, line): | 
|  | # dispatch | 
|  | ##        import token | 
|  | ##        print >> sys.stderr, 'ttype:', token.tok_name[ttype], \ | 
|  | ##              'tstring:', tstring | 
|  | self.__state(ttype, tstring, stup[0]) | 
|  |  | 
|  | def __waiting(self, ttype, tstring, lineno): | 
|  | # Do docstring extractions, if enabled | 
|  | if self.__options.docstrings: | 
|  | # module docstring? | 
|  | if self.__freshmodule: | 
|  | if ttype == tokenize.STRING: | 
|  | self.__addentry(safe_eval(tstring), lineno) | 
|  | self.__freshmodule = 0 | 
|  | elif ttype not in (tokenize.COMMENT, tokenize.NL): | 
|  | self.__freshmodule = 0 | 
|  | return | 
|  | # class docstring? | 
|  | if ttype == tokenize.NAME and tstring in ('class', 'def'): | 
|  | self.__state = self.__suiteseen | 
|  | return | 
|  | if ttype == tokenize.NAME and tstring in self.__options.keywords: | 
|  | self.__state = self.__keywordseen | 
|  |  | 
|  | def __suiteseen(self, ttype, tstring, lineno): | 
|  | # ignore anything until we see the colon | 
|  | if ttype == tokenize.OP and tstring == ':': | 
|  | self.__state = self.__suitedocstring | 
|  |  | 
|  | def __suitedocstring(self, ttype, tstring, lineno): | 
|  | # ignore any intervening noise | 
|  | if ttype == tokenize.STRING: | 
|  | self.__addentry(safe_eval(tstring), lineno) | 
|  | self.__state = self.__waiting | 
|  | elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, | 
|  | tokenize.COMMENT): | 
|  | # there was no class docstring | 
|  | self.__state = self.__waiting | 
|  |  | 
|  | def __keywordseen(self, ttype, tstring, lineno): | 
|  | if ttype == tokenize.OP and tstring == '(': | 
|  | self.__data = [] | 
|  | self.__lineno = lineno | 
|  | self.__state = self.__openseen | 
|  | else: | 
|  | self.__state = self.__waiting | 
|  |  | 
|  | def __openseen(self, ttype, tstring, lineno): | 
|  | if ttype == tokenize.OP and tstring == ')': | 
|  | # We've seen the last of the translatable strings.  Record the | 
|  | # line number of the first line of the strings and update the list | 
|  | # of messages seen.  Reset state for the next batch.  If there | 
|  | # were no strings inside _(), then just ignore this entry. | 
|  | if self.__data: | 
|  | self.__addentry(EMPTYSTRING.join(self.__data)) | 
|  | self.__state = self.__waiting | 
|  | elif ttype == tokenize.STRING: | 
|  | self.__data.append(safe_eval(tstring)) | 
|  | # TBD: should we warn if we seen anything else? | 
|  |  | 
|  | def __addentry(self, msg, lineno=None): | 
|  | if lineno is None: | 
|  | lineno = self.__lineno | 
|  | if not msg in self.__options.toexclude: | 
|  | entry = (self.__curfile, lineno) | 
|  | self.__messages.setdefault(msg, []).append(entry) | 
|  |  | 
|  | def set_filename(self, filename): | 
|  | self.__curfile = filename | 
|  |  | 
|  | def write(self, fp): | 
|  | options = self.__options | 
|  | timestamp = time.ctime(time.time()) | 
|  | # The time stamp in the header doesn't have the same format as that | 
|  | # generated by xgettext... | 
|  | print >> fp, pot_header % {'time': timestamp, 'version': __version__} | 
|  | for k, v in self.__messages.items(): | 
|  | if not options.writelocations: | 
|  | pass | 
|  | # location comments are different b/w Solaris and GNU: | 
|  | elif options.locationstyle == options.SOLARIS: | 
|  | for filename, lineno in v: | 
|  | d = {'filename': filename, 'lineno': lineno} | 
|  | print >>fp, _('# File: %(filename)s, line: %(lineno)d') % d | 
|  | elif options.locationstyle == options.GNU: | 
|  | # fit as many locations on one line, as long as the | 
|  | # resulting line length doesn't exceeds 'options.width' | 
|  | locline = '#:' | 
|  | for filename, lineno in v: | 
|  | d = {'filename': filename, 'lineno': lineno} | 
|  | s = _(' %(filename)s:%(lineno)d') % d | 
|  | if len(locline) + len(s) <= options.width: | 
|  | locline = locline + s | 
|  | else: | 
|  | print >> fp, locline | 
|  | locline = "#:" + s | 
|  | if len(locline) > 2: | 
|  | print >> fp, locline | 
|  | # TBD: sorting, normalizing | 
|  | print >> fp, 'msgid', normalize(k) | 
|  | print >> fp, 'msgstr ""\n' | 
|  |  | 
|  |  | 
|  |  | 
|  | def main(): | 
|  | global default_keywords | 
|  | try: | 
|  | opts, args = getopt.getopt( | 
|  | sys.argv[1:], | 
|  | 'ad:DEhk:Kno:p:S:Vvw:x:', | 
|  | ['extract-all', 'default-domain', 'escape', 'help', | 
|  | 'keyword=', 'no-default-keywords', | 
|  | 'add-location', 'no-location', 'output=', 'output-dir=', | 
|  | 'style=', 'verbose', 'version', 'width=', 'exclude-file=', | 
|  | 'docstrings', | 
|  | ]) | 
|  | except getopt.error, msg: | 
|  | usage(1, msg) | 
|  |  | 
|  | # for holding option values | 
|  | class Options: | 
|  | # constants | 
|  | GNU = 1 | 
|  | SOLARIS = 2 | 
|  | # defaults | 
|  | extractall = 0 # FIXME: currently this option has no effect at all. | 
|  | escape = 0 | 
|  | keywords = [] | 
|  | outpath = '' | 
|  | outfile = 'messages.pot' | 
|  | writelocations = 1 | 
|  | locationstyle = GNU | 
|  | verbose = 0 | 
|  | width = 78 | 
|  | excludefilename = '' | 
|  | docstrings = 0 | 
|  |  | 
|  | options = Options() | 
|  | locations = {'gnu' : options.GNU, | 
|  | 'solaris' : options.SOLARIS, | 
|  | } | 
|  |  | 
|  | # parse options | 
|  | for opt, arg in opts: | 
|  | if opt in ('-h', '--help'): | 
|  | usage(0) | 
|  | elif opt in ('-a', '--extract-all'): | 
|  | options.extractall = 1 | 
|  | elif opt in ('-d', '--default-domain'): | 
|  | options.outfile = arg + '.pot' | 
|  | elif opt in ('-E', '--escape'): | 
|  | options.escape = 1 | 
|  | elif opt in ('-D', '--docstrings'): | 
|  | options.docstrings = 1 | 
|  | elif opt in ('-k', '--keyword'): | 
|  | options.keywords.append(arg) | 
|  | elif opt in ('-K', '--no-default-keywords'): | 
|  | default_keywords = [] | 
|  | elif opt in ('-n', '--add-location'): | 
|  | options.writelocations = 1 | 
|  | elif opt in ('--no-location',): | 
|  | options.writelocations = 0 | 
|  | elif opt in ('-S', '--style'): | 
|  | options.locationstyle = locations.get(arg.lower()) | 
|  | if options.locationstyle is None: | 
|  | usage(1, _('Invalid value for --style: %s') % arg) | 
|  | elif opt in ('-o', '--output'): | 
|  | options.outfile = arg | 
|  | elif opt in ('-p', '--output-dir'): | 
|  | options.outpath = arg | 
|  | elif opt in ('-v', '--verbose'): | 
|  | options.verbose = 1 | 
|  | elif opt in ('-V', '--version'): | 
|  | print _('pygettext.py (xgettext for Python) %s') % __version__ | 
|  | sys.exit(0) | 
|  | elif opt in ('-w', '--width'): | 
|  | try: | 
|  | options.width = int(arg) | 
|  | except ValueError: | 
|  | usage(1, _('--width argument must be an integer: %s') % arg) | 
|  | elif opt in ('-x', '--exclude-file'): | 
|  | options.excludefilename = arg | 
|  |  | 
|  | # calculate escapes | 
|  | make_escapes(options.escape) | 
|  |  | 
|  | # calculate all keywords | 
|  | options.keywords.extend(default_keywords) | 
|  |  | 
|  | # initialize list of strings to exclude | 
|  | if options.excludefilename: | 
|  | try: | 
|  | fp = open(options.excludefilename) | 
|  | options.toexclude = fp.readlines() | 
|  | fp.close() | 
|  | except IOError: | 
|  | sys.stderr.write(_("Can't read --exclude-file: %s") % | 
|  | options.excludefilename) | 
|  | sys.exit(1) | 
|  | else: | 
|  | options.toexclude = [] | 
|  |  | 
|  | # slurp through all the files | 
|  | eater = TokenEater(options) | 
|  | for filename in args: | 
|  | if filename == '-': | 
|  | if options.verbose: | 
|  | print _('Reading standard input') | 
|  | fp = sys.stdin | 
|  | closep = 0 | 
|  | else: | 
|  | if options.verbose: | 
|  | print _('Working on %s') % filename | 
|  | fp = open(filename) | 
|  | closep = 1 | 
|  | try: | 
|  | eater.set_filename(filename) | 
|  | try: | 
|  | tokenize.tokenize(fp.readline, eater) | 
|  | except tokenize.TokenError, e: | 
|  | sys.stderr.write('%s: %s, line %d, column %d\n' % | 
|  | (e[0], filename, e[1][0], e[1][1])) | 
|  | finally: | 
|  | if closep: | 
|  | fp.close() | 
|  |  | 
|  | # write the output | 
|  | if options.outfile == '-': | 
|  | fp = sys.stdout | 
|  | closep = 0 | 
|  | else: | 
|  | if options.outpath: | 
|  | options.outfile = os.path.join(options.outpath, options.outfile) | 
|  | fp = open(options.outfile, 'w') | 
|  | closep = 1 | 
|  | try: | 
|  | eater.write(fp) | 
|  | finally: | 
|  | if closep: | 
|  | fp.close() | 
|  |  | 
|  |  | 
|  | if __name__ == '__main__': | 
|  | main() | 
|  | # some more test strings | 
|  | _(u'a unicode string') |