Blame - Tools/i18n/pygettext.py - platform/external/python/cpython3

blob: 2a3c97b902f7a9800bf0c45106fa2f2cf5728658 [file] [log] [blame]

Barry Warsaw	af57251	1999-08-11 21:40:38 +0000	[diff] [blame]	1	#! /usr/bin/env python
Barry Warsaw	a507c32	1999-11-03 16:46:05 +0000	[diff] [blame^]	2	# Originally written by Barry Warsaw <bwarsaw@python.org>
Barry Warsaw	e27db5a	1999-08-13 20:59:48 +0000	[diff] [blame]	3
				4	"""pygettext -- Python equivalent of xgettext(1)
				5
				6	Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
				7	internationalization of C programs. Most of these tools are independent of
				8	the programming language and can be used from within Python programs. Martin
				9	von Loewis' work[1] helps considerably in this regard.
				10
				11	There's one hole though; xgettext is the program that scans source code
				12	looking for message strings, but it groks only C (or C++). Python introduces
				13	a few wrinkles, such as dual quoting characters, triple quoted strings, and
				14	raw strings. xgettext understands none of this.
				15
				16	Enter pygettext, which uses Python's standard tokenize module to scan Python
				17	source code, generating .pot files identical to what GNU xgettext[2] generates
				18	for C and C++ code. From there, the standard GNU tools can be used.
				19
				20	A word about marking Python strings as candidates for translation. GNU
				21	xgettext recognizes the following keywords: gettext, dgettext, dcgettext, and
				22	gettext_noop. But those can be a lot of text to include all over your code.
				23	C and C++ have a trick: they use the C preprocessor. Most internationalized C
				24	source includes a #define for gettext() to _() so that what has to be written
				25	in the source is much less. Thus these are both translatable strings:
				26
				27	gettext("Translatable String")
				28	_("Translatable String")
				29
				30	Python of course has no preprocessor so this doesn't work so well. Thus,
				31	pygettext searches only for _() by default, but see the -k/--keyword flag
				32	below for how to augment this.
				33
				34	[1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
				35	[2] http://www.gnu.org/software/gettext/gettext.html
				36
				37
				38	NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
				39	where ever possible.
				40
				41	Usage: pygettext [options] filename ...
				42
				43	Options:
				44
				45	-a
				46	--extract-all
				47	Extract all strings
				48
				49	-d default-domain
				50	--default-domain=default-domain
				51	Rename the default output file from messages.pot to default-domain.pot
				52
				53	-k [word]
				54	--keyword[=word]
				55	Additional keywords to look for. Without `word' means not to use the
				56	default keywords. The default keywords, which are always looked for
				57	if not explicitly disabled: _
				58
				59	The default keyword list is different than GNU xgettext. You can have
				60	multiple -k flags on the command line.
				61
				62	--no-location
				63	Do not write filename/lineno location comments
				64
				65	-n [style]
				66	--add-location[=style]
				67	Write filename/lineno location comments indicating where each
				68	extracted string is found in the source. These lines appear before
				69	each msgid. Two styles are supported:
				70
				71	Solaris # File: filename, line: line-number
				72	Gnu #: filename:line
				73
				74	If style is omitted, Gnu is used. The style name is case
				75	insensitive. By default, locations are included.
				76
				77	--help
				78	-h
				79	print this help message and exit
				80
				81	"""
				82
				83	import os
				84	import sys
				85	import string
				86	import time
				87	import getopt
				88	import tokenize
				89
				90	__version__ = '0.1'
				91
				92
				93
				94	def usage(code, msg=''):
				95	print __doc__ % globals()
				96	if msg:
				97	print msg
				98	sys.exit(code)
				99
				100
				101
				102	def normalize(s):
				103	# This converts the various Python string types into a format that is
				104	# appropriate for .po files, namely much closer to C style.
				105	#
				106	# unwrap quotes, safely
				107	s = eval(s, {'__builtins__':{}}, {})
				108	# now escape any embedded double quotes
				109	parts = []
				110	last = 0
				111	i = string.find(s, '"')
				112	while i >= 0:
				113	# find the number of preceding backslashes
				114	j = i
				115	n = 0
				116	while j >= 0 and s[i] == '\\':
				117	j = j - 1
				118	n = n + 1
				119	if (n % 2) == 0:
				120	parts.append(s[last:j])
				121	parts.append('\\')
				122	parts.append(s[j:i])
				123	else:
				124	parts.append(s[last:i])
				125	last = i
				126	i = string.find(s, '"', i+1)
				127	else:
				128	parts.append(s[last:])
				129	if parts:
				130	return '"' + string.join(parts, '') + '"'
				131	else:
				132	return '"' + s + '"'
				133
				134
				135
				136	class TokenEater:
				137	def __init__(self, options):
				138	self.__options = options
				139	self.__messages = {}
				140	self.__state = self.__waiting
				141	self.__data = []
				142	self.__lineno = -1
				143
				144	def __call__(self, ttype, tstring, stup, etup, line):
				145	# dispatch
				146	self.__state(ttype, tstring, stup[0])
				147
				148	def __waiting(self, ttype, tstring, lineno):
				149	if ttype == tokenize.NAME and tstring in self.__options.keywords:
				150	self.__state = self.__keywordseen
				151
				152	def __keywordseen(self, ttype, tstring, lineno):
				153	if ttype == tokenize.OP and tstring == '(':
				154	self.__data = []
				155	self.__lineno = lineno
				156	self.__state = self.__openseen
				157	else:
				158	self.__state = self.__waiting
				159
				160	def __openseen(self, ttype, tstring, lineno):
				161	if ttype == tokenize.OP and tstring == ')':
				162	# We've seen the last of the translatable strings. Record the
				163	# line number of the first line of the strings and update the list
				164	# of messages seen. Reset state for the next batch. If there
				165	# were no strings inside _(), then just ignore this entry.
				166	if self.__data:
				167	msg = string.join(self.__data, '')
				168	entry = (self.__curfile, self.__lineno)
				169	linenos = self.__messages.get(msg)
				170	if linenos is None:
				171	self.__messages[msg] = [entry]
				172	else:
				173	linenos.append(entry)
				174	self.__state = self.__waiting
				175	elif ttype == tokenize.STRING:
				176	self.__data.append(normalize(tstring))
				177	# TBD: should we warn if we seen anything else?
				178
				179	def set_filename(self, filename):
				180	self.__curfile = filename
				181
				182	def write(self, fp):
				183	options = self.__options
				184	timestamp = time.ctime(time.time())
				185	# common header
				186	try:
				187	sys.stdout = fp
				188	print '# POT file generated by pygettext.py', __version__
				189	print '#', timestamp
				190	print '#'
				191	for k, v in self.__messages.items():
				192	for filename, lineno in v:
				193	# location comments are different b/w Solaris and GNU
				194	if options.location == options.SOLARIS:
				195	print '# File: %s,' % filename, 'line: %d' % lineno
				196	elif options.location == options.GNU:
				197	print '#: %s:%d' % (filename, lineno)
				198	# TBD: sorting, normalizing
				199	print 'msgid', k
				200	print 'msgstr '
				201	print
				202	finally:
				203	sys.stdout = sys.__stdout__
				204
				205
				206	def main():
				207	default_keywords = ['_']
				208	try:
				209	opts, args = getopt.getopt(
				210	sys.argv[1:],
				211	'k:d:n:h',
				212	['keyword', 'default-domain', 'help',
				213	'add-location=', 'no-location'])
				214	except getopt.error, msg:
				215	usage(1, msg)
				216
				217	# for holding option values
				218	class Options:
				219	# constants
				220	GNU = 1
				221	SOLARIS = 2
				222	# defaults
				223	keywords = []
				224	outfile = 'messages.pot'
				225	location = GNU
				226
				227	options = Options()
				228	locations = {'gnu' : options.GNU,
				229	'solaris' : options.SOLARIS,
				230	}
				231
				232	# parse options
				233	for opt, arg in opts:
				234	if opt in ('-h', '--help'):
				235	usage(0)
				236	elif opt in ('-k', '--keyword'):
				237	if arg is None:
				238	default_keywords = []
				239	options.keywords.append(arg)
				240	elif opt in ('-d', '--default-domain'):
				241	options.outfile = arg + '.pot'
				242	elif opt in ('-n', '--add-location'):
				243	if arg is None:
				244	arg = 'gnu'
				245	try:
				246	options.location = locations[string.lower(arg)]
				247	except KeyError:
				248	usage(1, 'Invalid value for --add-location: ' + arg)
				249	elif opt in ('--no-location',):
				250	options.location = 0
				251
				252	# calculate all keywords
				253	options.keywords.extend(default_keywords)
				254
				255	# slurp through all the files
				256	eater = TokenEater(options)
				257	for filename in args:
				258	fp = open(filename)
				259	eater.set_filename(filename)
				260	tokenize.tokenize(fp.readline, eater)
				261	fp.close()
				262
				263	fp = open(options.outfile, 'w')
				264	eater.write(fp)
				265	fp.close()
				266
				267
				268
				269	if __name__ == '__main__':
				270	main()