Blame - Tools/i18n/pygettext.py - platform/external/python/cpython3

blob: 3542f3f23ec2cd0f8f861935bb8a589c02146448 [file] [log] [blame]

Barry Warsaw	af57251	1999-08-11 21:40:38 +0000	[diff] [blame]	1	#! /usr/bin/env python
Barry Warsaw	e27db5a	1999-08-13 20:59:48 +0000	[diff] [blame^]	2
				3	"""pygettext -- Python equivalent of xgettext(1)
				4
				5	Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
				6	internationalization of C programs. Most of these tools are independent of
				7	the programming language and can be used from within Python programs. Martin
				8	von Loewis' work[1] helps considerably in this regard.
				9
				10	There's one hole though; xgettext is the program that scans source code
				11	looking for message strings, but it groks only C (or C++). Python introduces
				12	a few wrinkles, such as dual quoting characters, triple quoted strings, and
				13	raw strings. xgettext understands none of this.
				14
				15	Enter pygettext, which uses Python's standard tokenize module to scan Python
				16	source code, generating .pot files identical to what GNU xgettext[2] generates
				17	for C and C++ code. From there, the standard GNU tools can be used.
				18
				19	A word about marking Python strings as candidates for translation. GNU
				20	xgettext recognizes the following keywords: gettext, dgettext, dcgettext, and
				21	gettext_noop. But those can be a lot of text to include all over your code.
				22	C and C++ have a trick: they use the C preprocessor. Most internationalized C
				23	source includes a #define for gettext() to _() so that what has to be written
				24	in the source is much less. Thus these are both translatable strings:
				25
				26	gettext("Translatable String")
				27	_("Translatable String")
				28
				29	Python of course has no preprocessor so this doesn't work so well. Thus,
				30	pygettext searches only for _() by default, but see the -k/--keyword flag
				31	below for how to augment this.
				32
				33	[1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
				34	[2] http://www.gnu.org/software/gettext/gettext.html
				35
				36
				37	NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
				38	where ever possible.
				39
				40	Usage: pygettext [options] filename ...
				41
				42	Options:
				43
				44	-a
				45	--extract-all
				46	Extract all strings
				47
				48	-d default-domain
				49	--default-domain=default-domain
				50	Rename the default output file from messages.pot to default-domain.pot
				51
				52	-k [word]
				53	--keyword[=word]
				54	Additional keywords to look for. Without `word' means not to use the
				55	default keywords. The default keywords, which are always looked for
				56	if not explicitly disabled: _
				57
				58	The default keyword list is different than GNU xgettext. You can have
				59	multiple -k flags on the command line.
				60
				61	--no-location
				62	Do not write filename/lineno location comments
				63
				64	-n [style]
				65	--add-location[=style]
				66	Write filename/lineno location comments indicating where each
				67	extracted string is found in the source. These lines appear before
				68	each msgid. Two styles are supported:
				69
				70	Solaris # File: filename, line: line-number
				71	Gnu #: filename:line
				72
				73	If style is omitted, Gnu is used. The style name is case
				74	insensitive. By default, locations are included.
				75
				76	--help
				77	-h
				78	print this help message and exit
				79
				80	"""
				81
				82	import os
				83	import sys
				84	import string
				85	import time
				86	import getopt
				87	import tokenize
				88
				89	__version__ = '0.1'
				90
				91
				92
				93	def usage(code, msg=''):
				94	print __doc__ % globals()
				95	if msg:
				96	print msg
				97	sys.exit(code)
				98
				99
				100
				101	def normalize(s):
				102	# This converts the various Python string types into a format that is
				103	# appropriate for .po files, namely much closer to C style.
				104	#
				105	# unwrap quotes, safely
				106	s = eval(s, {'__builtins__':{}}, {})
				107	# now escape any embedded double quotes
				108	parts = []
				109	last = 0
				110	i = string.find(s, '"')
				111	while i >= 0:
				112	# find the number of preceding backslashes
				113	j = i
				114	n = 0
				115	while j >= 0 and s[i] == '\\':
				116	j = j - 1
				117	n = n + 1
				118	if (n % 2) == 0:
				119	parts.append(s[last:j])
				120	parts.append('\\')
				121	parts.append(s[j:i])
				122	else:
				123	parts.append(s[last:i])
				124	last = i
				125	i = string.find(s, '"', i+1)
				126	else:
				127	parts.append(s[last:])
				128	if parts:
				129	return '"' + string.join(parts, '') + '"'
				130	else:
				131	return '"' + s + '"'
				132
				133
				134
				135	class TokenEater:
				136	def __init__(self, options):
				137	self.__options = options
				138	self.__messages = {}
				139	self.__state = self.__waiting
				140	self.__data = []
				141	self.__lineno = -1
				142
				143	def __call__(self, ttype, tstring, stup, etup, line):
				144	# dispatch
				145	self.__state(ttype, tstring, stup[0])
				146
				147	def __waiting(self, ttype, tstring, lineno):
				148	if ttype == tokenize.NAME and tstring in self.__options.keywords:
				149	self.__state = self.__keywordseen
				150
				151	def __keywordseen(self, ttype, tstring, lineno):
				152	if ttype == tokenize.OP and tstring == '(':
				153	self.__data = []
				154	self.__lineno = lineno
				155	self.__state = self.__openseen
				156	else:
				157	self.__state = self.__waiting
				158
				159	def __openseen(self, ttype, tstring, lineno):
				160	if ttype == tokenize.OP and tstring == ')':
				161	# We've seen the last of the translatable strings. Record the
				162	# line number of the first line of the strings and update the list
				163	# of messages seen. Reset state for the next batch. If there
				164	# were no strings inside _(), then just ignore this entry.
				165	if self.__data:
				166	msg = string.join(self.__data, '')
				167	entry = (self.__curfile, self.__lineno)
				168	linenos = self.__messages.get(msg)
				169	if linenos is None:
				170	self.__messages[msg] = [entry]
				171	else:
				172	linenos.append(entry)
				173	self.__state = self.__waiting
				174	elif ttype == tokenize.STRING:
				175	self.__data.append(normalize(tstring))
				176	# TBD: should we warn if we seen anything else?
				177
				178	def set_filename(self, filename):
				179	self.__curfile = filename
				180
				181	def write(self, fp):
				182	options = self.__options
				183	timestamp = time.ctime(time.time())
				184	# common header
				185	try:
				186	sys.stdout = fp
				187	print '# POT file generated by pygettext.py', __version__
				188	print '#', timestamp
				189	print '#'
				190	for k, v in self.__messages.items():
				191	for filename, lineno in v:
				192	# location comments are different b/w Solaris and GNU
				193	if options.location == options.SOLARIS:
				194	print '# File: %s,' % filename, 'line: %d' % lineno
				195	elif options.location == options.GNU:
				196	print '#: %s:%d' % (filename, lineno)
				197	# TBD: sorting, normalizing
				198	print 'msgid', k
				199	print 'msgstr '
				200	print
				201	finally:
				202	sys.stdout = sys.__stdout__
				203
				204
				205	def main():
				206	default_keywords = ['_']
				207	try:
				208	opts, args = getopt.getopt(
				209	sys.argv[1:],
				210	'k:d:n:h',
				211	['keyword', 'default-domain', 'help',
				212	'add-location=', 'no-location'])
				213	except getopt.error, msg:
				214	usage(1, msg)
				215
				216	# for holding option values
				217	class Options:
				218	# constants
				219	GNU = 1
				220	SOLARIS = 2
				221	# defaults
				222	keywords = []
				223	outfile = 'messages.pot'
				224	location = GNU
				225
				226	options = Options()
				227	locations = {'gnu' : options.GNU,
				228	'solaris' : options.SOLARIS,
				229	}
				230
				231	# parse options
				232	for opt, arg in opts:
				233	if opt in ('-h', '--help'):
				234	usage(0)
				235	elif opt in ('-k', '--keyword'):
				236	if arg is None:
				237	default_keywords = []
				238	options.keywords.append(arg)
				239	elif opt in ('-d', '--default-domain'):
				240	options.outfile = arg + '.pot'
				241	elif opt in ('-n', '--add-location'):
				242	if arg is None:
				243	arg = 'gnu'
				244	try:
				245	options.location = locations[string.lower(arg)]
				246	except KeyError:
				247	usage(1, 'Invalid value for --add-location: ' + arg)
				248	elif opt in ('--no-location',):
				249	options.location = 0
				250
				251	# calculate all keywords
				252	options.keywords.extend(default_keywords)
				253
				254	# slurp through all the files
				255	eater = TokenEater(options)
				256	for filename in args:
				257	fp = open(filename)
				258	eater.set_filename(filename)
				259	tokenize.tokenize(fp.readline, eater)
				260	fp.close()
				261
				262	fp = open(options.outfile, 'w')
				263	eater.write(fp)
				264	fp.close()
				265
				266
				267
				268	if __name__ == '__main__':
				269	main()