Blame - utils/reindent.py - platform/external/autotest

blob: 65749f0be53ab86b1fcbfa8d2940fd84062db2d2 [file] [log] [blame]

mbligh	09a025e	2008-06-06 20:29:49 +0000	[diff] [blame]	1	#! /usr/bin/env python
				2
				3	# Released to the public domain, by Tim Peters, 03 October 2000.
				4
				5	"""reindent [-d][-r][-v] [ path ... ]
				6
				7	-d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
				8	-r (--recurse) Recurse. Search for all .py files in subdirectories too.
				9	-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
				10	-v (--verbose) Verbose. Print informative msgs; else no output.
				11	-h (--help) Help. Print this usage information and exit.
				12
				13	Change Python (.py) files to use 4-space indents and no hard tab characters.
				14	Also trim excess spaces and tabs from ends of lines, and remove empty lines
				15	at the end of files. Also ensure the last line ends with a newline.
				16
				17	If no paths are given on the command line, reindent operates as a filter,
				18	reading a single source file from standard input and writing the transformed
				19	source to standard output. In this case, the -d, -r and -v flags are
				20	ignored.
				21
				22	You can pass one or more file and/or directory paths. When a directory
				23	path, all .py files within the directory will be examined, and, if the -r
				24	option is given, likewise recursively for subdirectories.
				25
				26	If output is not to standard output, reindent overwrites files in place,
				27	renaming the originals with a .bak extension. If it finds nothing to
				28	change, the file is left alone. If reindent does change a file, the changed
				29	file is a fixed-point for future runs (i.e., running reindent on the
				30	resulting .py file won't change it again).
				31
				32	The hard part of reindenting is figuring out what to do with comment
				33	lines. So long as the input files get a clean bill of health from
				34	tabnanny.py, reindent should do a good job.
				35
				36	The backup file is a copy of the one that is being reindented. The ".bak"
				37	file is generated with shutil.copy(), but some corner cases regarding
				38	user/group and permissions could leave the backup file more readable that
				39	you'd prefer. You can always use the --nobackup option to prevent this.
				40	"""
				41
				42	__version__ = "1"
				43
				44	import tokenize
				45	import os, shutil
				46	import sys
				47
				48	verbose = 0
				49	recurse = 0
				50	dryrun = 0
				51	makebackup = True
				52
				53	def usage(msg=None):
				54	if msg is not None:
				55	print >> sys.stderr, msg
				56	print >> sys.stderr, __doc__
				57
				58	def errprint(*args):
				59	sep = ""
				60	for arg in args:
				61	sys.stderr.write(sep + str(arg))
				62	sep = " "
				63	sys.stderr.write("\n")
				64
				65	def main():
				66	import getopt
				67	global verbose, recurse, dryrun, makebackup
				68	try:
				69	opts, args = getopt.getopt(sys.argv[1:], "drnvh",
				70	["dryrun", "recurse", "nobackup", "verbose", "help"])
				71	except getopt.error, msg:
				72	usage(msg)
				73	return
				74	for o, a in opts:
				75	if o in ('-d', '--dryrun'):
				76	dryrun += 1
				77	elif o in ('-r', '--recurse'):
				78	recurse += 1
				79	elif o in ('-n', '--nobackup'):
				80	makebackup = False
				81	elif o in ('-v', '--verbose'):
				82	verbose += 1
				83	elif o in ('-h', '--help'):
				84	usage()
				85	return
				86	if not args:
				87	r = Reindenter(sys.stdin)
				88	r.run()
				89	r.write(sys.stdout)
				90	return
				91	for arg in args:
				92	check(arg)
				93
				94	def check(file):
				95	if os.path.isdir(file) and not os.path.islink(file):
				96	if verbose:
				97	print "listing directory", file
				98	names = os.listdir(file)
				99	for name in names:
				100	fullname = os.path.join(file, name)
				101	if ((recurse and os.path.isdir(fullname) and
				102	not os.path.islink(fullname))
				103	or name.lower().endswith(".py")):
				104	check(fullname)
				105	return
				106
				107	if verbose:
				108	print "checking", file, "...",
				109	try:
				110	f = open(file)
				111	except IOError, msg:
				112	errprint("%s: I/O Error: %s" % (file, str(msg)))
				113	return
				114
				115	r = Reindenter(f)
				116	f.close()
				117	if r.run():
				118	if verbose:
				119	print "changed."
				120	if dryrun:
				121	print "But this is a dry run, so leaving it alone."
				122	if not dryrun:
				123	bak = file + ".bak"
				124	if makebackup:
				125	shutil.copyfile(file, bak)
				126	if verbose:
				127	print "backed up", file, "to", bak
				128	f = open(file, "w")
				129	r.write(f)
				130	f.close()
				131	if verbose:
				132	print "wrote new", file
				133	return True
				134	else:
				135	if verbose:
				136	print "unchanged."
				137	return False
				138
				139	def _rstrip(line, JUNK='\n \t'):
				140	"""Return line stripped of trailing spaces, tabs, newlines.
				141
				142	Note that line.rstrip() instead also strips sundry control characters,
				143	but at least one known Emacs user expects to keep junk like that, not
				144	mentioning Barry by name or anything <wink>.
				145	"""
				146
				147	i = len(line)
				148	while i > 0 and line[i-1] in JUNK:
				149	i -= 1
				150	return line[:i]
				151
				152	class Reindenter:
				153
				154	def __init__(self, f):
				155	self.find_stmt = 1 # next token begins a fresh stmt?
				156	self.level = 0 # current indent level
				157
				158	# Raw file lines.
				159	self.raw = f.readlines()
				160
				161	# File lines, rstripped & tab-expanded. Dummy at start is so
				162	# that we can use tokenize's 1-based line numbering easily.
				163	# Note that a line is all-blank iff it's "\n".
				164	self.lines = [_rstrip(line).expandtabs() + "\n"
				165	for line in self.raw]
				166	self.lines.insert(0, None)
				167	self.index = 1 # index into self.lines of next line
				168
				169	# List of (lineno, indentlevel) pairs, one for each stmt and
				170	# comment line. indentlevel is -1 for comment lines, as a
				171	# signal that tokenize doesn't know what to do about them;
				172	# indeed, they're our headache!
				173	self.stats = []
				174
				175	def run(self):
				176	tokenize.tokenize(self.getline, self.tokeneater)
				177	# Remove trailing empty lines.
				178	lines = self.lines
				179	while lines and lines[-1] == "\n":
				180	lines.pop()
				181	# Sentinel.
				182	stats = self.stats
				183	stats.append((len(lines), 0))
				184	# Map count of leading spaces to # we want.
				185	have2want = {}
				186	# Program after transformation.
				187	after = self.after = []
				188	# Copy over initial empty lines -- there's nothing to do until
				189	# we see a line with something on it.
				190	i = stats[0][0]
				191	after.extend(lines[1:i])
				192	for i in range(len(stats)-1):
				193	thisstmt, thislevel = stats[i]
				194	nextstmt = stats[i+1][0]
				195	have = getlspace(lines[thisstmt])
				196	want = thislevel * 4
				197	if want < 0:
				198	# A comment line.
				199	if have:
				200	# An indented comment line. If we saw the same
				201	# indentation before, reuse what it most recently
				202	# mapped to.
				203	want = have2want.get(have, -1)
				204	if want < 0:
				205	# Then it probably belongs to the next real stmt.
				206	for j in xrange(i+1, len(stats)-1):
				207	jline, jlevel = stats[j]
				208	if jlevel >= 0:
				209	if have == getlspace(lines[jline]):
				210	want = jlevel * 4
				211	break
				212	if want < 0: # Maybe it's a hanging
				213	# comment like this one,
				214	# in which case we should shift it like its base
				215	# line got shifted.
				216	for j in xrange(i-1, -1, -1):
				217	jline, jlevel = stats[j]
				218	if jlevel >= 0:
				219	want = have + getlspace(after[jline-1]) - \
				220	getlspace(lines[jline])
				221	break
				222	if want < 0:
				223	# Still no luck -- leave it alone.
				224	want = have
				225	else:
				226	want = 0
				227	assert want >= 0
				228	have2want[have] = want
				229	diff = want - have
				230	if diff == 0 or have == 0:
				231	after.extend(lines[thisstmt:nextstmt])
				232	else:
				233	for line in lines[thisstmt:nextstmt]:
				234	if diff > 0:
				235	if line == "\n":
				236	after.append(line)
				237	else:
				238	after.append(" " * diff + line)
				239	else:
				240	remove = min(getlspace(line), -diff)
				241	after.append(line[remove:])
				242	return self.raw != self.after
				243
				244	def write(self, f):
				245	f.writelines(self.after)
				246
				247	# Line-getter for tokenize.
				248	def getline(self):
				249	if self.index >= len(self.lines):
				250	line = ""
				251	else:
				252	line = self.lines[self.index]
				253	self.index += 1
				254	return line
				255
				256	# Line-eater for tokenize.
				257	def tokeneater(self, type, token, (sline, scol), end, line,
				258	INDENT=tokenize.INDENT,
				259	DEDENT=tokenize.DEDENT,
				260	NEWLINE=tokenize.NEWLINE,
				261	COMMENT=tokenize.COMMENT,
				262	NL=tokenize.NL):
				263
				264	if type == NEWLINE:
				265	# A program statement, or ENDMARKER, will eventually follow,
				266	# after some (possibly empty) run of tokens of the form
				267	# (NL \| COMMENT)* (INDENT \| DEDENT+)?
				268	self.find_stmt = 1
				269
				270	elif type == INDENT:
				271	self.find_stmt = 1
				272	self.level += 1
				273
				274	elif type == DEDENT:
				275	self.find_stmt = 1
				276	self.level -= 1
				277
				278	elif type == COMMENT:
				279	if self.find_stmt:
				280	self.stats.append((sline, -1))
				281	# but we're still looking for a new stmt, so leave
				282	# find_stmt alone
				283
				284	elif type == NL:
				285	pass
				286
				287	elif self.find_stmt:
				288	# This is the first "real token" following a NEWLINE, so it
				289	# must be the first token of the next program statement, or an
				290	# ENDMARKER.
				291	self.find_stmt = 0
				292	if line: # not endmarker
				293	self.stats.append((sline, self.level))
				294
				295	# Count number of leading blanks.
				296	def getlspace(line):
				297	i, n = 0, len(line)
				298	while i < n and line[i] == " ":
				299	i += 1
				300	return i
				301
				302	if __name__ == '__main__':
				303	main()