Blame - Tools/scripts/reindent.py - platform/external/python/cpython3

blob: 5ac98c7f7bef2249b4aad432d7941da3f0435eb0 [file] [log] [blame]

Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	1	#! /usr/bin/env python
				2
				3	# Released to the public domain, by Tim Peters, 03 October 2000.
				4
Skip Montanaro	9a29e7a	2002-03-26 11:39:26 +0000	[diff] [blame]	5	"""reindent [-d][-r][-v] [ path ... ]
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	6
Skip Montanaro	165163f	2004-03-27 18:43:56 +0000	[diff] [blame]	7	-d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
				8	-r (--recurse) Recurse. Search for all .py files in subdirectories too.
				9	-v (--verbose) Verbose. Print informative msgs; else no output.
				10	-h (--help) Help. Print this usage information and exit.
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	11
				12	Change Python (.py) files to use 4-space indents and no hard tab characters.
Tim Peters	ba001a0	2001-10-04 19:44:10 +0000	[diff] [blame]	13	Also trim excess spaces and tabs from ends of lines, and remove empty lines
				14	at the end of files. Also ensure the last line ends with a newline.
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	15
Skip Montanaro	9a29e7a	2002-03-26 11:39:26 +0000	[diff] [blame]	16	If no paths are given on the command line, reindent operates as a filter,
				17	reading a single source file from standard input and writing the transformed
				18	source to standard output. In this case, the -d, -r and -v flags are
				19	ignored.
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	20
Skip Montanaro	9a29e7a	2002-03-26 11:39:26 +0000	[diff] [blame]	21	You can pass one or more file and/or directory paths. When a directory
				22	path, all .py files within the directory will be examined, and, if the -r
				23	option is given, likewise recursively for subdirectories.
				24
				25	If output is not to standard output, reindent overwrites files in place,
				26	renaming the originals with a .bak extension. If it finds nothing to
				27	change, the file is left alone. If reindent does change a file, the changed
				28	file is a fixed-point for future runs (i.e., running reindent on the
				29	resulting .py file won't change it again).
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	30
				31	The hard part of reindenting is figuring out what to do with comment
				32	lines. So long as the input files get a clean bill of health from
				33	tabnanny.py, reindent should do a good job.
				34	"""
				35
				36	__version__ = "1"
				37
				38	import tokenize
				39	import os
				40	import sys
				41
				42	verbose = 0
				43	recurse = 0
				44	dryrun = 0
				45
Skip Montanaro	165163f	2004-03-27 18:43:56 +0000	[diff] [blame]	46	def usage(msg=None):
				47	if msg is not None:
				48	print >> sys.stderr, msg
				49	print >> sys.stderr, __doc__
				50
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	51	def errprint(*args):
				52	sep = ""
				53	for arg in args:
				54	sys.stderr.write(sep + str(arg))
				55	sep = " "
				56	sys.stderr.write("\n")
				57
				58	def main():
				59	import getopt
				60	global verbose, recurse, dryrun
				61	try:
Skip Montanaro	165163f	2004-03-27 18:43:56 +0000	[diff] [blame]	62	opts, args = getopt.getopt(sys.argv[1:], "drvh",
				63	["dryrun", "recurse", "verbose", "help"])
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	64	except getopt.error, msg:
Skip Montanaro	165163f	2004-03-27 18:43:56 +0000	[diff] [blame]	65	usage(msg)
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	66	return
				67	for o, a in opts:
Skip Montanaro	165163f	2004-03-27 18:43:56 +0000	[diff] [blame]	68	if o in ('-d', '--dryrun'):
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	69	dryrun += 1
Skip Montanaro	165163f	2004-03-27 18:43:56 +0000	[diff] [blame]	70	elif o in ('-r', '--recurse'):
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	71	recurse += 1
Skip Montanaro	165163f	2004-03-27 18:43:56 +0000	[diff] [blame]	72	elif o in ('-v', '--verbose'):
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	73	verbose += 1
Skip Montanaro	165163f	2004-03-27 18:43:56 +0000	[diff] [blame]	74	elif o in ('-h', '--help'):
				75	usage()
				76	return
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	77	if not args:
Skip Montanaro	9a29e7a	2002-03-26 11:39:26 +0000	[diff] [blame]	78	r = Reindenter(sys.stdin)
				79	r.run()
				80	r.write(sys.stdout)
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	81	return
				82	for arg in args:
				83	check(arg)
				84
				85	def check(file):
				86	if os.path.isdir(file) and not os.path.islink(file):
				87	if verbose:
				88	print "listing directory", file
				89	names = os.listdir(file)
				90	for name in names:
				91	fullname = os.path.join(file, name)
				92	if ((recurse and os.path.isdir(fullname) and
				93	not os.path.islink(fullname))
				94	or name.lower().endswith(".py")):
				95	check(fullname)
				96	return
				97
				98	if verbose:
				99	print "checking", file, "...",
				100	try:
				101	f = open(file)
				102	except IOError, msg:
				103	errprint("%s: I/O Error: %s" % (file, str(msg)))
				104	return
				105
				106	r = Reindenter(f)
				107	f.close()
				108	if r.run():
				109	if verbose:
				110	print "changed."
				111	if dryrun:
				112	print "But this is a dry run, so leaving it alone."
				113	if not dryrun:
				114	bak = file + ".bak"
				115	if os.path.exists(bak):
				116	os.remove(bak)
				117	os.rename(file, bak)
				118	if verbose:
				119	print "renamed", file, "to", bak
				120	f = open(file, "w")
				121	r.write(f)
				122	f.close()
				123	if verbose:
				124	print "wrote new", file
				125	else:
				126	if verbose:
				127	print "unchanged."
				128
Tim Peters	ba001a0	2001-10-04 19:44:10 +0000	[diff] [blame]	129	def _rstrip(line, JUNK='\n \t'):
				130	"""Return line stripped of trailing spaces, tabs, newlines.
				131
				132	Note that line.rstrip() instead also strips sundry control characters,
				133	but at least one known Emacs user expects to keep junk like that, not
				134	mentioning Barry by name or anything <wink>.
				135	"""
				136
				137	i = len(line)
				138	while i > 0 and line[i-1] in JUNK:
				139	i -= 1
				140	return line[:i]
				141
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	142	class Reindenter:
				143
				144	def __init__(self, f):
				145	self.find_stmt = 1 # next token begins a fresh stmt?
				146	self.level = 0 # current indent level
				147
				148	# Raw file lines.
				149	self.raw = f.readlines()
				150
				151	# File lines, rstripped & tab-expanded. Dummy at start is so
				152	# that we can use tokenize's 1-based line numbering easily.
				153	# Note that a line is all-blank iff it's "\n".
Tim Peters	ba001a0	2001-10-04 19:44:10 +0000	[diff] [blame]	154	self.lines = [_rstrip(line).expandtabs() + "\n"
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	155	for line in self.raw]
				156	self.lines.insert(0, None)
				157	self.index = 1 # index into self.lines of next line
				158
				159	# List of (lineno, indentlevel) pairs, one for each stmt and
				160	# comment line. indentlevel is -1 for comment lines, as a
				161	# signal that tokenize doesn't know what to do about them;
				162	# indeed, they're our headache!
				163	self.stats = []
				164
				165	def run(self):
				166	tokenize.tokenize(self.getline, self.tokeneater)
				167	# Remove trailing empty lines.
				168	lines = self.lines
				169	while lines and lines[-1] == "\n":
				170	lines.pop()
				171	# Sentinel.
				172	stats = self.stats
				173	stats.append((len(lines), 0))
				174	# Map count of leading spaces to # we want.
				175	have2want = {}
				176	# Program after transformation.
				177	after = self.after = []
Tim Peters	54e5b89	2002-02-17 07:03:05 +0000	[diff] [blame]	178	# Copy over initial empty lines -- there's nothing to do until
				179	# we see a line with something on it.
				180	i = stats[0][0]
				181	after.extend(lines[1:i])
Tim Peters	ad14720	2000-10-05 03:48:38 +0000	[diff] [blame]	182	for i in range(len(stats)-1):
				183	thisstmt, thislevel = stats[i]
				184	nextstmt = stats[i+1][0]
				185	have = getlspace(lines[thisstmt])
				186	want = thislevel * 4
				187	if want < 0:
				188	# A comment line.
				189	if have:
				190	# An indented comment line. If we saw the same
				191	# indentation before, reuse what it most recently
				192	# mapped to.
				193	want = have2want.get(have, -1)
				194	if want < 0:
				195	# Then it probably belongs to the next real stmt.
				196	for j in xrange(i+1, len(stats)-1):
				197	jline, jlevel = stats[j]
				198	if jlevel >= 0:
				199	if have == getlspace(lines[jline]):
				200	want = jlevel * 4
				201	break
				202	if want < 0: # Maybe it's a hanging
				203	# comment like this one,
				204	# in which case we should shift it like its base
				205	# line got shifted.
				206	for j in xrange(i-1, -1, -1):
				207	jline, jlevel = stats[j]
				208	if jlevel >= 0:
				209	want = have + getlspace(after[jline-1]) - \
				210	getlspace(lines[jline])
				211	break
				212	if want < 0:
				213	# Still no luck -- leave it alone.
				214	want = have
				215	else:
				216	want = 0
				217	assert want >= 0
				218	have2want[have] = want
				219	diff = want - have
				220	if diff == 0 or have == 0:
				221	after.extend(lines[thisstmt:nextstmt])
				222	else:
				223	for line in lines[thisstmt:nextstmt]:
				224	if diff > 0:
				225	if line == "\n":
				226	after.append(line)
				227	else:
				228	after.append(" " * diff + line)
				229	else:
				230	remove = min(getlspace(line), -diff)
				231	after.append(line[remove:])
				232	return self.raw != self.after
				233
				234	def write(self, f):
				235	f.writelines(self.after)
				236
				237	# Line-getter for tokenize.
				238	def getline(self):
				239	if self.index >= len(self.lines):
				240	line = ""
				241	else:
				242	line = self.lines[self.index]
				243	self.index += 1
				244	return line
				245
				246	# Line-eater for tokenize.
				247	def tokeneater(self, type, token, (sline, scol), end, line,
				248	INDENT=tokenize.INDENT,
				249	DEDENT=tokenize.DEDENT,
				250	NEWLINE=tokenize.NEWLINE,
				251	COMMENT=tokenize.COMMENT,
				252	NL=tokenize.NL):
				253
				254	if type == NEWLINE:
				255	# A program statement, or ENDMARKER, will eventually follow,
				256	# after some (possibly empty) run of tokens of the form
				257	# (NL \| COMMENT)* (INDENT \| DEDENT+)?
				258	self.find_stmt = 1
				259
				260	elif type == INDENT:
				261	self.find_stmt = 1
				262	self.level += 1
				263
				264	elif type == DEDENT:
				265	self.find_stmt = 1
				266	self.level -= 1
				267
				268	elif type == COMMENT:
				269	if self.find_stmt:
				270	self.stats.append((sline, -1))
				271	# but we're still looking for a new stmt, so leave
				272	# find_stmt alone
				273
				274	elif type == NL:
				275	pass
				276
				277	elif self.find_stmt:
				278	# This is the first "real token" following a NEWLINE, so it
				279	# must be the first token of the next program statement, or an
				280	# ENDMARKER.
				281	self.find_stmt = 0
				282	if line: # not endmarker
				283	self.stats.append((sline, self.level))
				284
				285	# Count number of leading blanks.
				286	def getlspace(line):
				287	i, n = 0, len(line)
				288	while i < n and line[i] == " ":
				289	i += 1
				290	return i
				291
				292	if __name__ == '__main__':
				293	main()