Blame - Tools/scripts/reindent.py - platform/external/python/cpython3

blob: 4b742a8c42abb82903e7e1d71cb6aacbb7698fa4 [file] [log] [blame]

Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	1	#! /usr/bin/env python
				2
				3	# Released to the public domain, by Tim Peters, 03 October 2000.
				4
				5	"""reindent [-d][-r][-v] path ...
				6
				7	-d Dry run. Analyze, but don't make any changes to, files.
				8	-r Recurse. Search for all .py files in subdirectories too.
				9	-v Verbose. Print informative msgs; else no output.
				10
				11	Change Python (.py) files to use 4-space indents and no hard tab characters.
				12	Also trim excess whitespace from ends of lines, and empty lines at the ends
				13	of files. Ensure the last line ends with a newline.
				14
				15	Pass one or more file and/or directory paths. When a directory path, all
				16	.py files within the directory will be examined, and, if the -r option is
				17	given, likewise recursively for subdirectories.
				18
				19	Overwrites files in place, renaming the originals with a .bak extension.
				20	If reindent finds nothing to change, the file is left alone. If reindent
				21	does change a file, the changed file is a fixed-point for reindent (i.e.,
				22	running reindent on the resulting .py file won't change it again).
				23
				24	The hard part of reindenting is figuring out what to do with comment
				25	lines. So long as the input files get a clean bill of health from
				26	tabnanny.py, reindent should do a good job.
				27	"""
				28
				29	__version__ = "1"
				30
				31	import tokenize
				32	import os
				33	import sys
				34
				35	verbose = 0
				36	recurse = 0
				37	dryrun = 0
				38
				39	def errprint(*args):
				40	sep = ""
				41	for arg in args:
				42	sys.stderr.write(sep + str(arg))
				43	sep = " "
				44	sys.stderr.write("\n")
				45
				46	def main():
				47	import getopt
				48	global verbose, recurse, dryrun
				49	try:
				50	opts, args = getopt.getopt(sys.argv[1:], "drv")
				51	except getopt.error, msg:
				52	errprint(msg)
				53	return
				54	for o, a in opts:
				55	if o == '-d':
				56	dryrun += 1
				57	elif o == '-r':
				58	recurse += 1
				59	elif o == '-v':
				60	verbose += 1
				61	if not args:
				62	errprint("Usage:", __doc__)
				63	return
				64	for arg in args:
				65	check(arg)
				66
				67	def check(file):
				68	if os.path.isdir(file) and not os.path.islink(file):
				69	if verbose:
				70	print "listing directory", file
				71	names = os.listdir(file)
				72	for name in names:
				73	fullname = os.path.join(file, name)
				74	if ((recurse and os.path.isdir(fullname) and
				75	not os.path.islink(fullname))
				76	or name.lower().endswith(".py")):
				77	check(fullname)
				78	return
				79
				80	if verbose:
				81	print "checking", file, "...",
				82	try:
				83	f = open(file)
				84	except IOError, msg:
				85	errprint("%s: I/O Error: %s" % (file, str(msg)))
				86	return
				87
				88	r = Reindenter(f)
				89	f.close()
				90	if r.run():
				91	if verbose:
				92	print "changed."
				93	if dryrun:
				94	print "But this is a dry run, so leaving it alone."
				95	if not dryrun:
				96	bak = file + ".bak"
				97	if os.path.exists(bak):
				98	os.remove(bak)
				99	os.rename(file, bak)
				100	if verbose:
				101	print "renamed", file, "to", bak
				102	f = open(file, "w")
				103	r.write(f)
				104	f.close()
				105	if verbose:
				106	print "wrote new", file
				107	else:
				108	if verbose:
				109	print "unchanged."
				110
				111	class Reindenter:
				112
				113	def __init__(self, f):
				114	self.find_stmt = 1 # next token begins a fresh stmt?
				115	self.level = 0 # current indent level
				116
				117	# Raw file lines.
				118	self.raw = f.readlines()
				119
				120	# File lines, rstripped & tab-expanded. Dummy at start is so
				121	# that we can use tokenize's 1-based line numbering easily.
				122	# Note that a line is all-blank iff it's "\n".
				123	self.lines = [line.rstrip().expandtabs() + "\n"
				124	for line in self.raw]
				125	self.lines.insert(0, None)
				126	self.index = 1 # index into self.lines of next line
				127
				128	# List of (lineno, indentlevel) pairs, one for each stmt and
				129	# comment line. indentlevel is -1 for comment lines, as a
				130	# signal that tokenize doesn't know what to do about them;
				131	# indeed, they're our headache!
				132	self.stats = []
				133
				134	def run(self):
				135	tokenize.tokenize(self.getline, self.tokeneater)
				136	# Remove trailing empty lines.
				137	lines = self.lines
				138	while lines and lines[-1] == "\n":
				139	lines.pop()
				140	# Sentinel.
				141	stats = self.stats
				142	stats.append((len(lines), 0))
				143	# Map count of leading spaces to # we want.
				144	have2want = {}
				145	# Program after transformation.
				146	after = self.after = []
				147	for i in range(len(stats)-1):
				148	thisstmt, thislevel = stats[i]
				149	nextstmt = stats[i+1][0]
				150	have = getlspace(lines[thisstmt])
				151	want = thislevel * 4
				152	if want < 0:
				153	# A comment line.
				154	if have:
				155	# An indented comment line. If we saw the same
				156	# indentation before, reuse what it most recently
				157	# mapped to.
				158	want = have2want.get(have, -1)
				159	if want < 0:
				160	# Then it probably belongs to the next real stmt.
				161	for j in xrange(i+1, len(stats)-1):
				162	jline, jlevel = stats[j]
				163	if jlevel >= 0:
				164	if have == getlspace(lines[jline]):
				165	want = jlevel * 4
				166	break
				167	if want < 0: # Maybe it's a hanging
				168	# comment like this one,
				169	# in which case we should shift it like its base
				170	# line got shifted.
				171	for j in xrange(i-1, -1, -1):
				172	jline, jlevel = stats[j]
				173	if jlevel >= 0:
				174	want = have + getlspace(after[jline-1]) - \
				175	getlspace(lines[jline])
				176	break
				177	if want < 0:
				178	# Still no luck -- leave it alone.
				179	want = have
				180	else:
				181	want = 0
				182	assert want >= 0
				183	have2want[have] = want
				184	diff = want - have
				185	if diff == 0 or have == 0:
				186	after.extend(lines[thisstmt:nextstmt])
				187	else:
				188	for line in lines[thisstmt:nextstmt]:
				189	if diff > 0:
				190	if line == "\n":
				191	after.append(line)
				192	else:
				193	after.append(" " * diff + line)
				194	else:
				195	remove = min(getlspace(line), -diff)
				196	after.append(line[remove:])
				197	return self.raw != self.after
				198
				199	def write(self, f):
				200	f.writelines(self.after)
				201
				202	# Line-getter for tokenize.
				203	def getline(self):
				204	if self.index >= len(self.lines):
				205	line = ""
				206	else:
				207	line = self.lines[self.index]
				208	self.index += 1
				209	return line
				210
				211	# Line-eater for tokenize.
				212	def tokeneater(self, type, token, (sline, scol), end, line,
				213	INDENT=tokenize.INDENT,
				214	DEDENT=tokenize.DEDENT,
				215	NEWLINE=tokenize.NEWLINE,
				216	COMMENT=tokenize.COMMENT,
				217	NL=tokenize.NL):
				218
				219	if type == NEWLINE:
				220	# A program statement, or ENDMARKER, will eventually follow,
				221	# after some (possibly empty) run of tokens of the form
				222	# (NL \| COMMENT)* (INDENT \| DEDENT+)?
				223	self.find_stmt = 1
				224
				225	elif type == INDENT:
				226	self.find_stmt = 1
				227	self.level += 1
				228
				229	elif type == DEDENT:
				230	self.find_stmt = 1
				231	self.level -= 1
				232
				233	elif type == COMMENT:
				234	if self.find_stmt:
				235	self.stats.append((sline, -1))
				236	# but we're still looking for a new stmt, so leave
				237	# find_stmt alone
				238
				239	elif type == NL:
				240	pass
				241
				242	elif self.find_stmt:
				243	# This is the first "real token" following a NEWLINE, so it
				244	# must be the first token of the next program statement, or an
				245	# ENDMARKER.
				246	self.find_stmt = 0
				247	if line: # not endmarker
				248	self.stats.append((sline, self.level))
				249
				250	# Count number of leading blanks.
				251	def getlspace(line):
				252	i, n = 0, len(line)
				253	while i < n and line[i] == " ":
				254	i += 1
				255	return i
				256
				257	if __name__ == '__main__':
				258	main()