Blame - Tools/scripts/reindent.py - platform/external/python/cpython3

blob: e646aed9c3ce08238bfc963128253f88f0cf88cc [file] [log] [blame]

Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	1	#! /usr/bin/env python
				2
				3	# Released to the public domain, by Tim Peters, 03 October 2000.
				4
				5	"""reindent [-d][-r][-v] path ...
				6
				7	-d Dry run. Analyze, but don't make any changes to, files.
				8	-r Recurse. Search for all .py files in subdirectories too.
				9	-v Verbose. Print informative msgs; else no output.
				10
				11	Change Python (.py) files to use 4-space indents and no hard tab characters.
Tim Peters	ba001a0	2001-10-04 19:44:10 +0000	[diff] [blame^]	12	Also trim excess spaces and tabs from ends of lines, and remove empty lines
				13	at the end of files. Also ensure the last line ends with a newline.
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	14
				15	Pass one or more file and/or directory paths. When a directory path, all
				16	.py files within the directory will be examined, and, if the -r option is
				17	given, likewise recursively for subdirectories.
				18
				19	Overwrites files in place, renaming the originals with a .bak extension.
				20	If reindent finds nothing to change, the file is left alone. If reindent
				21	does change a file, the changed file is a fixed-point for reindent (i.e.,
				22	running reindent on the resulting .py file won't change it again).
				23
				24	The hard part of reindenting is figuring out what to do with comment
				25	lines. So long as the input files get a clean bill of health from
				26	tabnanny.py, reindent should do a good job.
				27	"""
				28
				29	__version__ = "1"
				30
				31	import tokenize
				32	import os
				33	import sys
				34
				35	verbose = 0
				36	recurse = 0
				37	dryrun = 0
				38
				39	def errprint(*args):
				40	sep = ""
				41	for arg in args:
				42	sys.stderr.write(sep + str(arg))
				43	sep = " "
				44	sys.stderr.write("\n")
				45
				46	def main():
				47	import getopt
				48	global verbose, recurse, dryrun
				49	try:
				50	opts, args = getopt.getopt(sys.argv[1:], "drv")
				51	except getopt.error, msg:
				52	errprint(msg)
				53	return
				54	for o, a in opts:
				55	if o == '-d':
				56	dryrun += 1
				57	elif o == '-r':
				58	recurse += 1
				59	elif o == '-v':
				60	verbose += 1
				61	if not args:
				62	errprint("Usage:", __doc__)
				63	return
				64	for arg in args:
				65	check(arg)
				66
				67	def check(file):
				68	if os.path.isdir(file) and not os.path.islink(file):
				69	if verbose:
				70	print "listing directory", file
				71	names = os.listdir(file)
				72	for name in names:
				73	fullname = os.path.join(file, name)
				74	if ((recurse and os.path.isdir(fullname) and
				75	not os.path.islink(fullname))
				76	or name.lower().endswith(".py")):
				77	check(fullname)
				78	return
				79
				80	if verbose:
				81	print "checking", file, "...",
				82	try:
				83	f = open(file)
				84	except IOError, msg:
				85	errprint("%s: I/O Error: %s" % (file, str(msg)))
				86	return
				87
				88	r = Reindenter(f)
				89	f.close()
				90	if r.run():
				91	if verbose:
				92	print "changed."
				93	if dryrun:
				94	print "But this is a dry run, so leaving it alone."
				95	if not dryrun:
				96	bak = file + ".bak"
				97	if os.path.exists(bak):
				98	os.remove(bak)
				99	os.rename(file, bak)
				100	if verbose:
				101	print "renamed", file, "to", bak
				102	f = open(file, "w")
				103	r.write(f)
				104	f.close()
				105	if verbose:
				106	print "wrote new", file
				107	else:
				108	if verbose:
				109	print "unchanged."
				110
Tim Peters	ba001a0	2001-10-04 19:44:10 +0000	[diff] [blame^]	111	def _rstrip(line, JUNK='\n \t'):
				112	"""Return line stripped of trailing spaces, tabs, newlines.
				113
				114	Note that line.rstrip() instead also strips sundry control characters,
				115	but at least one known Emacs user expects to keep junk like that, not
				116	mentioning Barry by name or anything <wink>.
				117	"""
				118
				119	i = len(line)
				120	while i > 0 and line[i-1] in JUNK:
				121	i -= 1
				122	return line[:i]
				123
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	124	class Reindenter:
				125
				126	def __init__(self, f):
				127	self.find_stmt = 1 # next token begins a fresh stmt?
				128	self.level = 0 # current indent level
				129
				130	# Raw file lines.
				131	self.raw = f.readlines()
				132
				133	# File lines, rstripped & tab-expanded. Dummy at start is so
				134	# that we can use tokenize's 1-based line numbering easily.
				135	# Note that a line is all-blank iff it's "\n".
Tim Peters	ba001a0	2001-10-04 19:44:10 +0000	[diff] [blame^]	136	self.lines = [_rstrip(line).expandtabs() + "\n"
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	137	for line in self.raw]
				138	self.lines.insert(0, None)
				139	self.index = 1 # index into self.lines of next line
				140
				141	# List of (lineno, indentlevel) pairs, one for each stmt and
				142	# comment line. indentlevel is -1 for comment lines, as a
				143	# signal that tokenize doesn't know what to do about them;
				144	# indeed, they're our headache!
				145	self.stats = []
				146
				147	def run(self):
				148	tokenize.tokenize(self.getline, self.tokeneater)
				149	# Remove trailing empty lines.
				150	lines = self.lines
				151	while lines and lines[-1] == "\n":
				152	lines.pop()
				153	# Sentinel.
				154	stats = self.stats
				155	stats.append((len(lines), 0))
				156	# Map count of leading spaces to # we want.
				157	have2want = {}
				158	# Program after transformation.
				159	after = self.after = []
				160	for i in range(len(stats)-1):
				161	thisstmt, thislevel = stats[i]
				162	nextstmt = stats[i+1][0]
				163	have = getlspace(lines[thisstmt])
				164	want = thislevel * 4
				165	if want < 0:
				166	# A comment line.
				167	if have:
				168	# An indented comment line. If we saw the same
				169	# indentation before, reuse what it most recently
				170	# mapped to.
				171	want = have2want.get(have, -1)
				172	if want < 0:
				173	# Then it probably belongs to the next real stmt.
				174	for j in xrange(i+1, len(stats)-1):
				175	jline, jlevel = stats[j]
				176	if jlevel >= 0:
				177	if have == getlspace(lines[jline]):
				178	want = jlevel * 4
				179	break
				180	if want < 0: # Maybe it's a hanging
				181	# comment like this one,
				182	# in which case we should shift it like its base
				183	# line got shifted.
				184	for j in xrange(i-1, -1, -1):
				185	jline, jlevel = stats[j]
				186	if jlevel >= 0:
				187	want = have + getlspace(after[jline-1]) - \
				188	getlspace(lines[jline])
				189	break
				190	if want < 0:
				191	# Still no luck -- leave it alone.
				192	want = have
				193	else:
				194	want = 0
				195	assert want >= 0
				196	have2want[have] = want
				197	diff = want - have
				198	if diff == 0 or have == 0:
				199	after.extend(lines[thisstmt:nextstmt])
				200	else:
				201	for line in lines[thisstmt:nextstmt]:
				202	if diff > 0:
				203	if line == "\n":
				204	after.append(line)
				205	else:
				206	after.append(" " * diff + line)
				207	else:
				208	remove = min(getlspace(line), -diff)
				209	after.append(line[remove:])
				210	return self.raw != self.after
				211
				212	def write(self, f):
				213	f.writelines(self.after)
				214
				215	# Line-getter for tokenize.
				216	def getline(self):
				217	if self.index >= len(self.lines):
				218	line = ""
				219	else:
				220	line = self.lines[self.index]
				221	self.index += 1
				222	return line
				223
				224	# Line-eater for tokenize.
				225	def tokeneater(self, type, token, (sline, scol), end, line,
				226	INDENT=tokenize.INDENT,
				227	DEDENT=tokenize.DEDENT,
				228	NEWLINE=tokenize.NEWLINE,
				229	COMMENT=tokenize.COMMENT,
				230	NL=tokenize.NL):
				231
				232	if type == NEWLINE:
				233	# A program statement, or ENDMARKER, will eventually follow,
				234	# after some (possibly empty) run of tokens of the form
				235	# (NL \| COMMENT)* (INDENT \| DEDENT+)?
				236	self.find_stmt = 1
				237
				238	elif type == INDENT:
				239	self.find_stmt = 1
				240	self.level += 1
				241
				242	elif type == DEDENT:
				243	self.find_stmt = 1
				244	self.level -= 1
				245
				246	elif type == COMMENT:
				247	if self.find_stmt:
				248	self.stats.append((sline, -1))
				249	# but we're still looking for a new stmt, so leave
				250	# find_stmt alone
				251
				252	elif type == NL:
				253	pass
				254
				255	elif self.find_stmt:
				256	# This is the first "real token" following a NEWLINE, so it
				257	# must be the first token of the next program statement, or an
				258	# ENDMARKER.
				259	self.find_stmt = 0
				260	if line: # not endmarker
				261	self.stats.append((sline, self.level))
				262
				263	# Count number of leading blanks.
				264	def getlspace(line):
				265	i, n = 0, len(line)
				266	while i < n and line[i] == " ":
				267	i += 1
				268	return i
				269
				270	if __name__ == '__main__':
				271	main()