Blame - Tools/scripts/reindent.py - platform/external/python/cpython3

blob: 21c553b7f805f25fe87cfe03a1c909a88314d3e8 [file] [log] [blame]

Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	1	#! /usr/bin/env python
				2
				3	# Released to the public domain, by Tim Peters, 03 October 2000.
				4
				5	"""reindent [-d][-r][-v] path ...
				6
				7	-d Dry run. Analyze, but don't make any changes to, files.
				8	-r Recurse. Search for all .py files in subdirectories too.
				9	-v Verbose. Print informative msgs; else no output.
				10
				11	Change Python (.py) files to use 4-space indents and no hard tab characters.
Tim Peters	ba001a0	2001-10-04 19:44:10 +0000	[diff] [blame]	12	Also trim excess spaces and tabs from ends of lines, and remove empty lines
				13	at the end of files. Also ensure the last line ends with a newline.
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	14
				15	Pass one or more file and/or directory paths. When a directory path, all
				16	.py files within the directory will be examined, and, if the -r option is
				17	given, likewise recursively for subdirectories.
				18
				19	Overwrites files in place, renaming the originals with a .bak extension.
				20	If reindent finds nothing to change, the file is left alone. If reindent
				21	does change a file, the changed file is a fixed-point for reindent (i.e.,
				22	running reindent on the resulting .py file won't change it again).
				23
				24	The hard part of reindenting is figuring out what to do with comment
				25	lines. So long as the input files get a clean bill of health from
				26	tabnanny.py, reindent should do a good job.
				27	"""
				28
				29	__version__ = "1"
				30
				31	import tokenize
				32	import os
				33	import sys
				34
				35	verbose = 0
				36	recurse = 0
				37	dryrun = 0
				38
				39	def errprint(*args):
				40	sep = ""
				41	for arg in args:
				42	sys.stderr.write(sep + str(arg))
				43	sep = " "
				44	sys.stderr.write("\n")
				45
				46	def main():
				47	import getopt
				48	global verbose, recurse, dryrun
				49	try:
				50	opts, args = getopt.getopt(sys.argv[1:], "drv")
				51	except getopt.error, msg:
				52	errprint(msg)
				53	return
				54	for o, a in opts:
				55	if o == '-d':
				56	dryrun += 1
				57	elif o == '-r':
				58	recurse += 1
				59	elif o == '-v':
				60	verbose += 1
				61	if not args:
				62	errprint("Usage:", __doc__)
				63	return
				64	for arg in args:
				65	check(arg)
				66
				67	def check(file):
				68	if os.path.isdir(file) and not os.path.islink(file):
				69	if verbose:
				70	print "listing directory", file
				71	names = os.listdir(file)
				72	for name in names:
				73	fullname = os.path.join(file, name)
				74	if ((recurse and os.path.isdir(fullname) and
				75	not os.path.islink(fullname))
				76	or name.lower().endswith(".py")):
				77	check(fullname)
				78	return
				79
				80	if verbose:
				81	print "checking", file, "...",
				82	try:
				83	f = open(file)
				84	except IOError, msg:
				85	errprint("%s: I/O Error: %s" % (file, str(msg)))
				86	return
				87
				88	r = Reindenter(f)
				89	f.close()
				90	if r.run():
				91	if verbose:
				92	print "changed."
				93	if dryrun:
				94	print "But this is a dry run, so leaving it alone."
				95	if not dryrun:
				96	bak = file + ".bak"
				97	if os.path.exists(bak):
				98	os.remove(bak)
				99	os.rename(file, bak)
				100	if verbose:
				101	print "renamed", file, "to", bak
				102	f = open(file, "w")
				103	r.write(f)
				104	f.close()
				105	if verbose:
				106	print "wrote new", file
				107	else:
				108	if verbose:
				109	print "unchanged."
				110
Tim Peters	ba001a0	2001-10-04 19:44:10 +0000	[diff] [blame]	111	def _rstrip(line, JUNK='\n \t'):
				112	"""Return line stripped of trailing spaces, tabs, newlines.
				113
				114	Note that line.rstrip() instead also strips sundry control characters,
				115	but at least one known Emacs user expects to keep junk like that, not
				116	mentioning Barry by name or anything <wink>.
				117	"""
				118
				119	i = len(line)
				120	while i > 0 and line[i-1] in JUNK:
				121	i -= 1
				122	return line[:i]
				123
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	124	class Reindenter:
				125
				126	def __init__(self, f):
				127	self.find_stmt = 1 # next token begins a fresh stmt?
				128	self.level = 0 # current indent level
				129
				130	# Raw file lines.
				131	self.raw = f.readlines()
				132
				133	# File lines, rstripped & tab-expanded. Dummy at start is so
				134	# that we can use tokenize's 1-based line numbering easily.
				135	# Note that a line is all-blank iff it's "\n".
Tim Peters	ba001a0	2001-10-04 19:44:10 +0000	[diff] [blame]	136	self.lines = [_rstrip(line).expandtabs() + "\n"
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	137	for line in self.raw]
				138	self.lines.insert(0, None)
				139	self.index = 1 # index into self.lines of next line
				140
				141	# List of (lineno, indentlevel) pairs, one for each stmt and
				142	# comment line. indentlevel is -1 for comment lines, as a
				143	# signal that tokenize doesn't know what to do about them;
				144	# indeed, they're our headache!
				145	self.stats = []
				146
				147	def run(self):
				148	tokenize.tokenize(self.getline, self.tokeneater)
				149	# Remove trailing empty lines.
				150	lines = self.lines
				151	while lines and lines[-1] == "\n":
				152	lines.pop()
				153	# Sentinel.
				154	stats = self.stats
				155	stats.append((len(lines), 0))
				156	# Map count of leading spaces to # we want.
				157	have2want = {}
				158	# Program after transformation.
				159	after = self.after = []
Tim Peters	54e5b89	2002-02-17 07:03:05 +0000	[diff] [blame^]	160	# Copy over initial empty lines -- there's nothing to do until
				161	# we see a line with something on it.
				162	i = stats[0][0]
				163	after.extend(lines[1:i])
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	164	for i in range(len(stats)-1):
				165	thisstmt, thislevel = stats[i]
				166	nextstmt = stats[i+1][0]
				167	have = getlspace(lines[thisstmt])
				168	want = thislevel * 4
				169	if want < 0:
				170	# A comment line.
				171	if have:
				172	# An indented comment line. If we saw the same
				173	# indentation before, reuse what it most recently
				174	# mapped to.
				175	want = have2want.get(have, -1)
				176	if want < 0:
				177	# Then it probably belongs to the next real stmt.
				178	for j in xrange(i+1, len(stats)-1):
				179	jline, jlevel = stats[j]
				180	if jlevel >= 0:
				181	if have == getlspace(lines[jline]):
				182	want = jlevel * 4
				183	break
				184	if want < 0: # Maybe it's a hanging
				185	# comment like this one,
				186	# in which case we should shift it like its base
				187	# line got shifted.
				188	for j in xrange(i-1, -1, -1):
				189	jline, jlevel = stats[j]
				190	if jlevel >= 0:
				191	want = have + getlspace(after[jline-1]) - \
				192	getlspace(lines[jline])
				193	break
				194	if want < 0:
				195	# Still no luck -- leave it alone.
				196	want = have
				197	else:
				198	want = 0
				199	assert want >= 0
				200	have2want[have] = want
				201	diff = want - have
				202	if diff == 0 or have == 0:
				203	after.extend(lines[thisstmt:nextstmt])
				204	else:
				205	for line in lines[thisstmt:nextstmt]:
				206	if diff > 0:
				207	if line == "\n":
				208	after.append(line)
				209	else:
				210	after.append(" " * diff + line)
				211	else:
				212	remove = min(getlspace(line), -diff)
				213	after.append(line[remove:])
				214	return self.raw != self.after
				215
				216	def write(self, f):
				217	f.writelines(self.after)
				218
				219	# Line-getter for tokenize.
				220	def getline(self):
				221	if self.index >= len(self.lines):
				222	line = ""
				223	else:
				224	line = self.lines[self.index]
				225	self.index += 1
				226	return line
				227
				228	# Line-eater for tokenize.
				229	def tokeneater(self, type, token, (sline, scol), end, line,
				230	INDENT=tokenize.INDENT,
				231	DEDENT=tokenize.DEDENT,
				232	NEWLINE=tokenize.NEWLINE,
				233	COMMENT=tokenize.COMMENT,
				234	NL=tokenize.NL):
				235
				236	if type == NEWLINE:
				237	# A program statement, or ENDMARKER, will eventually follow,
				238	# after some (possibly empty) run of tokens of the form
				239	# (NL \| COMMENT)* (INDENT \| DEDENT+)?
				240	self.find_stmt = 1
				241
				242	elif type == INDENT:
				243	self.find_stmt = 1
				244	self.level += 1
				245
				246	elif type == DEDENT:
				247	self.find_stmt = 1
				248	self.level -= 1
				249
				250	elif type == COMMENT:
				251	if self.find_stmt:
				252	self.stats.append((sline, -1))
				253	# but we're still looking for a new stmt, so leave
				254	# find_stmt alone
				255
				256	elif type == NL:
				257	pass
				258
				259	elif self.find_stmt:
				260	# This is the first "real token" following a NEWLINE, so it
				261	# must be the first token of the next program statement, or an
				262	# ENDMARKER.
				263	self.find_stmt = 0
				264	if line: # not endmarker
				265	self.stats.append((sline, self.level))
				266
				267	# Count number of leading blanks.
				268	def getlspace(line):
				269	i, n = 0, len(line)
				270	while i < n and line[i] == " ":
				271	i += 1
				272	return i
				273
				274	if __name__ == '__main__':
				275	main()