Blame - Tools/scripts/reindent.py - platform/external/python/cpython3

blob: 13150f9eef1705287841daa18f09c990c5530cc6 [file] [log] [blame]

Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	1	#! /usr/bin/env python
				2
				3	# Released to the public domain, by Tim Peters, 03 October 2000.
				4
Skip Montanaro	9a29e7a	2002-03-26 11:39:26 +0000	[diff] [blame]	5	"""reindent [-d][-r][-v] [ path ... ]
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	6
				7	-d Dry run. Analyze, but don't make any changes to, files.
				8	-r Recurse. Search for all .py files in subdirectories too.
				9	-v Verbose. Print informative msgs; else no output.
				10
				11	Change Python (.py) files to use 4-space indents and no hard tab characters.
Tim Peters	ba001a0	2001-10-04 19:44:10 +0000	[diff] [blame]	12	Also trim excess spaces and tabs from ends of lines, and remove empty lines
				13	at the end of files. Also ensure the last line ends with a newline.
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	14
Skip Montanaro	9a29e7a	2002-03-26 11:39:26 +0000	[diff] [blame]	15	If no paths are given on the command line, reindent operates as a filter,
				16	reading a single source file from standard input and writing the transformed
				17	source to standard output. In this case, the -d, -r and -v flags are
				18	ignored.
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	19
Skip Montanaro	9a29e7a	2002-03-26 11:39:26 +0000	[diff] [blame]	20	You can pass one or more file and/or directory paths. When a directory
				21	path, all .py files within the directory will be examined, and, if the -r
				22	option is given, likewise recursively for subdirectories.
				23
				24	If output is not to standard output, reindent overwrites files in place,
				25	renaming the originals with a .bak extension. If it finds nothing to
				26	change, the file is left alone. If reindent does change a file, the changed
				27	file is a fixed-point for future runs (i.e., running reindent on the
				28	resulting .py file won't change it again).
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	29
				30	The hard part of reindenting is figuring out what to do with comment
				31	lines. So long as the input files get a clean bill of health from
				32	tabnanny.py, reindent should do a good job.
				33	"""
				34
				35	__version__ = "1"
				36
				37	import tokenize
				38	import os
				39	import sys
				40
				41	verbose = 0
				42	recurse = 0
				43	dryrun = 0
				44
				45	def errprint(*args):
				46	sep = ""
				47	for arg in args:
				48	sys.stderr.write(sep + str(arg))
				49	sep = " "
				50	sys.stderr.write("\n")
				51
				52	def main():
				53	import getopt
				54	global verbose, recurse, dryrun
				55	try:
				56	opts, args = getopt.getopt(sys.argv[1:], "drv")
				57	except getopt.error, msg:
				58	errprint(msg)
				59	return
				60	for o, a in opts:
				61	if o == '-d':
				62	dryrun += 1
				63	elif o == '-r':
				64	recurse += 1
				65	elif o == '-v':
				66	verbose += 1
				67	if not args:
Skip Montanaro	9a29e7a	2002-03-26 11:39:26 +0000	[diff] [blame]	68	r = Reindenter(sys.stdin)
				69	r.run()
				70	r.write(sys.stdout)
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	71	return
				72	for arg in args:
				73	check(arg)
				74
				75	def check(file):
				76	if os.path.isdir(file) and not os.path.islink(file):
				77	if verbose:
				78	print "listing directory", file
				79	names = os.listdir(file)
				80	for name in names:
				81	fullname = os.path.join(file, name)
				82	if ((recurse and os.path.isdir(fullname) and
				83	not os.path.islink(fullname))
				84	or name.lower().endswith(".py")):
				85	check(fullname)
				86	return
				87
				88	if verbose:
				89	print "checking", file, "...",
				90	try:
				91	f = open(file)
				92	except IOError, msg:
				93	errprint("%s: I/O Error: %s" % (file, str(msg)))
				94	return
				95
				96	r = Reindenter(f)
				97	f.close()
				98	if r.run():
				99	if verbose:
				100	print "changed."
				101	if dryrun:
				102	print "But this is a dry run, so leaving it alone."
				103	if not dryrun:
				104	bak = file + ".bak"
				105	if os.path.exists(bak):
				106	os.remove(bak)
				107	os.rename(file, bak)
				108	if verbose:
				109	print "renamed", file, "to", bak
				110	f = open(file, "w")
				111	r.write(f)
				112	f.close()
				113	if verbose:
				114	print "wrote new", file
				115	else:
				116	if verbose:
				117	print "unchanged."
				118
Tim Peters	ba001a0	2001-10-04 19:44:10 +0000	[diff] [blame]	119	def _rstrip(line, JUNK='\n \t'):
				120	"""Return line stripped of trailing spaces, tabs, newlines.
				121
				122	Note that line.rstrip() instead also strips sundry control characters,
				123	but at least one known Emacs user expects to keep junk like that, not
				124	mentioning Barry by name or anything <wink>.
				125	"""
				126
				127	i = len(line)
				128	while i > 0 and line[i-1] in JUNK:
				129	i -= 1
				130	return line[:i]
				131
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	132	class Reindenter:
				133
				134	def __init__(self, f):
				135	self.find_stmt = 1 # next token begins a fresh stmt?
				136	self.level = 0 # current indent level
				137
				138	# Raw file lines.
				139	self.raw = f.readlines()
				140
				141	# File lines, rstripped & tab-expanded. Dummy at start is so
				142	# that we can use tokenize's 1-based line numbering easily.
				143	# Note that a line is all-blank iff it's "\n".
Tim Peters	ba001a0	2001-10-04 19:44:10 +0000	[diff] [blame]	144	self.lines = [_rstrip(line).expandtabs() + "\n"
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	145	for line in self.raw]
				146	self.lines.insert(0, None)
				147	self.index = 1 # index into self.lines of next line
				148
				149	# List of (lineno, indentlevel) pairs, one for each stmt and
				150	# comment line. indentlevel is -1 for comment lines, as a
				151	# signal that tokenize doesn't know what to do about them;
				152	# indeed, they're our headache!
				153	self.stats = []
				154
				155	def run(self):
				156	tokenize.tokenize(self.getline, self.tokeneater)
				157	# Remove trailing empty lines.
				158	lines = self.lines
				159	while lines and lines[-1] == "\n":
				160	lines.pop()
				161	# Sentinel.
				162	stats = self.stats
				163	stats.append((len(lines), 0))
				164	# Map count of leading spaces to # we want.
				165	have2want = {}
				166	# Program after transformation.
				167	after = self.after = []
Tim Peters	54e5b89	2002-02-17 07:03:05 +0000	[diff] [blame]	168	# Copy over initial empty lines -- there's nothing to do until
				169	# we see a line with something on it.
				170	i = stats[0][0]
				171	after.extend(lines[1:i])
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	172	for i in range(len(stats)-1):
				173	thisstmt, thislevel = stats[i]
				174	nextstmt = stats[i+1][0]
				175	have = getlspace(lines[thisstmt])
				176	want = thislevel * 4
				177	if want < 0:
				178	# A comment line.
				179	if have:
				180	# An indented comment line. If we saw the same
				181	# indentation before, reuse what it most recently
				182	# mapped to.
				183	want = have2want.get(have, -1)
				184	if want < 0:
				185	# Then it probably belongs to the next real stmt.
				186	for j in xrange(i+1, len(stats)-1):
				187	jline, jlevel = stats[j]
				188	if jlevel >= 0:
				189	if have == getlspace(lines[jline]):
				190	want = jlevel * 4
				191	break
				192	if want < 0: # Maybe it's a hanging
				193	# comment like this one,
				194	# in which case we should shift it like its base
				195	# line got shifted.
				196	for j in xrange(i-1, -1, -1):
				197	jline, jlevel = stats[j]
				198	if jlevel >= 0:
				199	want = have + getlspace(after[jline-1]) - \
				200	getlspace(lines[jline])
				201	break
				202	if want < 0:
				203	# Still no luck -- leave it alone.
				204	want = have
				205	else:
				206	want = 0
				207	assert want >= 0
				208	have2want[have] = want
				209	diff = want - have
				210	if diff == 0 or have == 0:
				211	after.extend(lines[thisstmt:nextstmt])
				212	else:
				213	for line in lines[thisstmt:nextstmt]:
				214	if diff > 0:
				215	if line == "\n":
				216	after.append(line)
				217	else:
				218	after.append(" " * diff + line)
				219	else:
				220	remove = min(getlspace(line), -diff)
				221	after.append(line[remove:])
				222	return self.raw != self.after
				223
				224	def write(self, f):
				225	f.writelines(self.after)
				226
				227	# Line-getter for tokenize.
				228	def getline(self):
				229	if self.index >= len(self.lines):
				230	line = ""
				231	else:
				232	line = self.lines[self.index]
				233	self.index += 1
				234	return line
				235
				236	# Line-eater for tokenize.
				237	def tokeneater(self, type, token, (sline, scol), end, line,
				238	INDENT=tokenize.INDENT,
				239	DEDENT=tokenize.DEDENT,
				240	NEWLINE=tokenize.NEWLINE,
				241	COMMENT=tokenize.COMMENT,
				242	NL=tokenize.NL):
				243
				244	if type == NEWLINE:
				245	# A program statement, or ENDMARKER, will eventually follow,
				246	# after some (possibly empty) run of tokens of the form
				247	# (NL \| COMMENT)* (INDENT \| DEDENT+)?
				248	self.find_stmt = 1
				249
				250	elif type == INDENT:
				251	self.find_stmt = 1
				252	self.level += 1
				253
				254	elif type == DEDENT:
				255	self.find_stmt = 1
				256	self.level -= 1
				257
				258	elif type == COMMENT:
				259	if self.find_stmt:
				260	self.stats.append((sline, -1))
				261	# but we're still looking for a new stmt, so leave
				262	# find_stmt alone
				263
				264	elif type == NL:
				265	pass
				266
				267	elif self.find_stmt:
				268	# This is the first "real token" following a NEWLINE, so it
				269	# must be the first token of the next program statement, or an
				270	# ENDMARKER.
				271	self.find_stmt = 0
				272	if line: # not endmarker
				273	self.stats.append((sline, self.level))
				274
				275	# Count number of leading blanks.
				276	def getlspace(line):
				277	i, n = 0, len(line)
				278	while i < n and line[i] == " ":
				279	i += 1
				280	return i
				281
				282	if __name__ == '__main__':
				283	main()