blob: 1b8a8113a9c4625989709fd0e85be8e468f0ea87 [file] [log] [blame]
Martin v. Löwisef04c442008-03-19 05:04:44 +00001#!/usr/bin/env python2.5
2# Copyright 2006 Google, Inc. All Rights Reserved.
3# Licensed to PSF under a Contributor Agreement.
4
5"""Refactoring framework.
6
7Used as a main program, this can refactor any number of files and/or
8recursively descend down directories. Imported as a module, this
9provides infrastructure to write your own refactoring tool.
10"""
11
12__author__ = "Guido van Rossum <guido@python.org>"
13
14
15# Python imports
16import os
17import sys
18import difflib
19import optparse
20import logging
Christian Heimes81ee3ef2008-05-04 22:42:01 +000021from collections import defaultdict
22from itertools import chain
Martin v. Löwisef04c442008-03-19 05:04:44 +000023
24# Local imports
25from .pgen2 import driver
26from .pgen2 import tokenize
27
28from . import pytree
29from . import patcomp
30from . import fixes
31from . import pygram
32
Benjamin Petersondf6dc8f2008-06-15 02:57:40 +000033def main(fixer_dir, args=None):
Martin v. Löwisef04c442008-03-19 05:04:44 +000034 """Main program.
35
Benjamin Petersondf6dc8f2008-06-15 02:57:40 +000036 Args:
37 fixer_dir: directory where fixer modules are located.
38 args: optional; a list of command line arguments. If omitted,
39 sys.argv[1:] is used.
Martin v. Löwisef04c442008-03-19 05:04:44 +000040
41 Returns a suggested exit status (0, 1, 2).
42 """
43 # Set up option parser
44 parser = optparse.OptionParser(usage="refactor.py [options] file|dir ...")
45 parser.add_option("-d", "--doctests_only", action="store_true",
46 help="Fix up doctests only")
47 parser.add_option("-f", "--fix", action="append", default=[],
48 help="Each FIX specifies a transformation; default all")
49 parser.add_option("-l", "--list-fixes", action="store_true",
50 help="List available transformations (fixes/fix_*.py)")
51 parser.add_option("-p", "--print-function", action="store_true",
52 help="Modify the grammar so that print() is a function")
53 parser.add_option("-v", "--verbose", action="store_true",
54 help="More verbose logging")
55 parser.add_option("-w", "--write", action="store_true",
56 help="Write back modified files")
57
58 # Parse command line arguments
59 options, args = parser.parse_args(args)
60 if options.list_fixes:
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +000061 print("Available transformations for the -f/--fix option:")
Benjamin Petersondf6dc8f2008-06-15 02:57:40 +000062 for fixname in get_all_fix_names(fixer_dir):
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +000063 print(fixname)
Martin v. Löwisef04c442008-03-19 05:04:44 +000064 if not args:
65 return 0
66 if not args:
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +000067 print("At least one file or directory argument required.", file=sys.stderr)
68 print("Use --help to show usage.", file=sys.stderr)
Martin v. Löwisef04c442008-03-19 05:04:44 +000069 return 2
70
Benjamin Peterson2a691a82008-03-31 01:51:45 +000071 # Set up logging handler
72 if sys.version_info < (2, 4):
73 hdlr = logging.StreamHandler()
74 fmt = logging.Formatter('%(name)s: %(message)s')
75 hdlr.setFormatter(fmt)
76 logging.root.addHandler(hdlr)
77 else:
78 logging.basicConfig(format='%(name)s: %(message)s', level=logging.INFO)
79
Martin v. Löwisef04c442008-03-19 05:04:44 +000080 # Initialize the refactoring tool
Benjamin Petersondf6dc8f2008-06-15 02:57:40 +000081 rt = RefactoringTool(fixer_dir, options)
Martin v. Löwisef04c442008-03-19 05:04:44 +000082
83 # Refactor all files and directories passed as arguments
84 if not rt.errors:
85 rt.refactor_args(args)
86 rt.summarize()
87
88 # Return error status (0 if rt.errors is zero)
89 return int(bool(rt.errors))
90
91
Benjamin Petersondf6dc8f2008-06-15 02:57:40 +000092def get_all_fix_names(fixer_dir):
Martin v. Löwisef04c442008-03-19 05:04:44 +000093 """Return a sorted list of all available fix names."""
94 fix_names = []
Benjamin Petersondf6dc8f2008-06-15 02:57:40 +000095 names = os.listdir(fixer_dir)
Martin v. Löwisef04c442008-03-19 05:04:44 +000096 names.sort()
97 for name in names:
98 if name.startswith("fix_") and name.endswith(".py"):
99 fix_names.append(name[4:-3])
100 fix_names.sort()
101 return fix_names
102
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000103def get_head_types(pat):
104 """ Accepts a pytree Pattern Node and returns a set
105 of the pattern types which will match first. """
106
107 if isinstance(pat, (pytree.NodePattern, pytree.LeafPattern)):
108 # NodePatters must either have no type and no content
109 # or a type and content -- so they don't get any farther
110 # Always return leafs
111 return set([pat.type])
112
113 if isinstance(pat, pytree.NegatedPattern):
114 if pat.content:
115 return get_head_types(pat.content)
116 return set([None]) # Negated Patterns don't have a type
117
118 if isinstance(pat, pytree.WildcardPattern):
119 # Recurse on each node in content
120 r = set()
121 for p in pat.content:
122 for x in p:
123 r.update(get_head_types(x))
124 return r
125
126 raise Exception("Oh no! I don't understand pattern %s" %(pat))
127
128def get_headnode_dict(fixer_list):
129 """ Accepts a list of fixers and returns a dictionary
130 of head node type --> fixer list. """
131 head_nodes = defaultdict(list)
132 for fixer in fixer_list:
133 if not fixer.pattern:
134 head_nodes[None].append(fixer)
135 continue
136 for t in get_head_types(fixer.pattern):
137 head_nodes[t].append(fixer)
138 return head_nodes
139
Martin v. Löwisef04c442008-03-19 05:04:44 +0000140
141class RefactoringTool(object):
142
Benjamin Petersondf6dc8f2008-06-15 02:57:40 +0000143 def __init__(self, fixer_dir, options):
Martin v. Löwisef04c442008-03-19 05:04:44 +0000144 """Initializer.
145
Benjamin Petersondf6dc8f2008-06-15 02:57:40 +0000146 Args:
147 fixer_dir: directory in which to find fixer modules.
148 options: an optparse.Values instance.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000149 """
Benjamin Petersondf6dc8f2008-06-15 02:57:40 +0000150 self.fixer_dir = fixer_dir
Martin v. Löwisef04c442008-03-19 05:04:44 +0000151 self.options = options
152 self.errors = []
153 self.logger = logging.getLogger("RefactoringTool")
154 self.fixer_log = []
155 if self.options.print_function:
156 del pygram.python_grammar.keywords["print"]
157 self.driver = driver.Driver(pygram.python_grammar,
158 convert=pytree.convert,
159 logger=self.logger)
160 self.pre_order, self.post_order = self.get_fixers()
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000161
162 self.pre_order = get_headnode_dict(self.pre_order)
163 self.post_order = get_headnode_dict(self.post_order)
164
Martin v. Löwisef04c442008-03-19 05:04:44 +0000165 self.files = [] # List of files that were or should be modified
166
167 def get_fixers(self):
168 """Inspects the options to load the requested patterns and handlers.
Martin v. Löwisf733c602008-03-19 05:26:18 +0000169
Martin v. Löwisef04c442008-03-19 05:04:44 +0000170 Returns:
171 (pre_order, post_order), where pre_order is the list of fixers that
172 want a pre-order AST traversal, and post_order is the list that want
173 post-order traversal.
174 """
Benjamin Petersondf6dc8f2008-06-15 02:57:40 +0000175 fixer_pkg = ".".join(self.fixer_dir.split(os.path.sep))
Martin v. Löwisef04c442008-03-19 05:04:44 +0000176 pre_order_fixers = []
177 post_order_fixers = []
178 fix_names = self.options.fix
179 if not fix_names or "all" in fix_names:
Benjamin Petersondf6dc8f2008-06-15 02:57:40 +0000180 fix_names = get_all_fix_names(self.fixer_dir)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000181 for fix_name in fix_names:
182 try:
Benjamin Petersondf6dc8f2008-06-15 02:57:40 +0000183 mod = __import__(fixer_pkg + ".fix_" + fix_name, {}, {}, ["*"])
Martin v. Löwisef04c442008-03-19 05:04:44 +0000184 except ImportError:
185 self.log_error("Can't find transformation %s", fix_name)
186 continue
187 parts = fix_name.split("_")
188 class_name = "Fix" + "".join([p.title() for p in parts])
189 try:
190 fix_class = getattr(mod, class_name)
191 except AttributeError:
192 self.log_error("Can't find fixes.fix_%s.%s",
193 fix_name, class_name)
194 continue
195 try:
196 fixer = fix_class(self.options, self.fixer_log)
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000197 except Exception as err:
Martin v. Löwisef04c442008-03-19 05:04:44 +0000198 self.log_error("Can't instantiate fixes.fix_%s.%s()",
199 fix_name, class_name, exc_info=True)
200 continue
201 if fixer.explicit and fix_name not in self.options.fix:
202 self.log_message("Skipping implicit fixer: %s", fix_name)
203 continue
204
205 if self.options.verbose:
206 self.log_message("Adding transformation: %s", fix_name)
207 if fixer.order == "pre":
208 pre_order_fixers.append(fixer)
209 elif fixer.order == "post":
210 post_order_fixers.append(fixer)
211 else:
212 raise ValueError("Illegal fixer order: %r" % fixer.order)
Martin v. Löwis3faa84f2008-03-22 00:07:09 +0000213
214 pre_order_fixers.sort(key=lambda x: x.run_order)
215 post_order_fixers.sort(key=lambda x: x.run_order)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000216 return (pre_order_fixers, post_order_fixers)
217
218 def log_error(self, msg, *args, **kwds):
219 """Increments error count and log a message."""
220 self.errors.append((msg, args, kwds))
221 self.logger.error(msg, *args, **kwds)
222
223 def log_message(self, msg, *args):
224 """Hook to log a message."""
225 if args:
226 msg = msg % args
227 self.logger.info(msg)
228
229 def refactor_args(self, args):
230 """Refactors files and directories from an argument list."""
231 for arg in args:
232 if arg == "-":
233 self.refactor_stdin()
234 elif os.path.isdir(arg):
235 self.refactor_dir(arg)
236 else:
237 self.refactor_file(arg)
238
239 def refactor_dir(self, arg):
240 """Descends down a directory and refactor every Python file found.
241
242 Python files are assumed to have a .py extension.
243
244 Files and subdirectories starting with '.' are skipped.
245 """
246 for dirpath, dirnames, filenames in os.walk(arg):
247 if self.options.verbose:
248 self.log_message("Descending into %s", dirpath)
249 dirnames.sort()
250 filenames.sort()
251 for name in filenames:
252 if not name.startswith(".") and name.endswith("py"):
253 fullname = os.path.join(dirpath, name)
254 self.refactor_file(fullname)
255 # Modify dirnames in-place to remove subdirs with leading dots
256 dirnames[:] = [dn for dn in dirnames if not dn.startswith(".")]
257
258 def refactor_file(self, filename):
259 """Refactors a file."""
260 try:
261 f = open(filename)
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000262 except IOError as err:
Martin v. Löwisef04c442008-03-19 05:04:44 +0000263 self.log_error("Can't open %s: %s", filename, err)
264 return
265 try:
266 input = f.read() + "\n" # Silence certain parse errors
267 finally:
268 f.close()
269 if self.options.doctests_only:
270 if self.options.verbose:
271 self.log_message("Refactoring doctests in %s", filename)
272 output = self.refactor_docstring(input, filename)
273 if output != input:
274 self.write_file(output, filename, input)
275 elif self.options.verbose:
276 self.log_message("No doctest changes in %s", filename)
277 else:
278 tree = self.refactor_string(input, filename)
279 if tree and tree.was_changed:
280 # The [:-1] is to take off the \n we added earlier
281 self.write_file(str(tree)[:-1], filename)
282 elif self.options.verbose:
283 self.log_message("No changes in %s", filename)
284
285 def refactor_string(self, data, name):
286 """Refactor a given input string.
Martin v. Löwisf733c602008-03-19 05:26:18 +0000287
Martin v. Löwisef04c442008-03-19 05:04:44 +0000288 Args:
289 data: a string holding the code to be refactored.
290 name: a human-readable name for use in error/log messages.
Martin v. Löwisf733c602008-03-19 05:26:18 +0000291
Martin v. Löwisef04c442008-03-19 05:04:44 +0000292 Returns:
293 An AST corresponding to the refactored input stream; None if
294 there were errors during the parse.
295 """
296 try:
297 tree = self.driver.parse_string(data,1)
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000298 except Exception as err:
Martin v. Löwisef04c442008-03-19 05:04:44 +0000299 self.log_error("Can't parse %s: %s: %s",
300 name, err.__class__.__name__, err)
301 return
302 if self.options.verbose:
303 self.log_message("Refactoring %s", name)
304 self.refactor_tree(tree, name)
305 return tree
306
307 def refactor_stdin(self):
308 if self.options.write:
309 self.log_error("Can't write changes back to stdin")
310 return
311 input = sys.stdin.read()
312 if self.options.doctests_only:
313 if self.options.verbose:
314 self.log_message("Refactoring doctests in stdin")
315 output = self.refactor_docstring(input, "<stdin>")
316 if output != input:
317 self.write_file(output, "<stdin>", input)
318 elif self.options.verbose:
319 self.log_message("No doctest changes in stdin")
320 else:
321 tree = self.refactor_string(input, "<stdin>")
322 if tree and tree.was_changed:
323 self.write_file(str(tree), "<stdin>", input)
324 elif self.options.verbose:
325 self.log_message("No changes in stdin")
326
327 def refactor_tree(self, tree, name):
328 """Refactors a parse tree (modifying the tree in place).
Martin v. Löwisf733c602008-03-19 05:26:18 +0000329
Martin v. Löwisef04c442008-03-19 05:04:44 +0000330 Args:
331 tree: a pytree.Node instance representing the root of the tree
332 to be refactored.
333 name: a human-readable name for this tree.
Martin v. Löwisf733c602008-03-19 05:26:18 +0000334
Martin v. Löwisef04c442008-03-19 05:04:44 +0000335 Returns:
336 True if the tree was modified, False otherwise.
337 """
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000338 # Two calls to chain are required because pre_order.values()
339 # will be a list of lists of fixers:
340 # [[<fixer ...>, <fixer ...>], [<fixer ...>]]
341 all_fixers = chain(chain(*self.pre_order.values()),\
342 chain(*self.post_order.values()))
Martin v. Löwisef04c442008-03-19 05:04:44 +0000343 for fixer in all_fixers:
344 fixer.start_tree(tree, name)
345
346 self.traverse_by(self.pre_order, tree.pre_order())
347 self.traverse_by(self.post_order, tree.post_order())
348
349 for fixer in all_fixers:
350 fixer.finish_tree(tree, name)
351 return tree.was_changed
352
353 def traverse_by(self, fixers, traversal):
354 """Traverse an AST, applying a set of fixers to each node.
Martin v. Löwisf733c602008-03-19 05:26:18 +0000355
Martin v. Löwisef04c442008-03-19 05:04:44 +0000356 This is a helper method for refactor_tree().
Martin v. Löwisf733c602008-03-19 05:26:18 +0000357
Martin v. Löwisef04c442008-03-19 05:04:44 +0000358 Args:
359 fixers: a list of fixer instances.
360 traversal: a generator that yields AST nodes.
Martin v. Löwisf733c602008-03-19 05:26:18 +0000361
Martin v. Löwisef04c442008-03-19 05:04:44 +0000362 Returns:
363 None
364 """
365 if not fixers:
366 return
367 for node in traversal:
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000368 for fixer in fixers[node.type] + fixers[None]:
Martin v. Löwisef04c442008-03-19 05:04:44 +0000369 results = fixer.match(node)
370 if results:
371 new = fixer.transform(node, results)
372 if new is not None and (new != node or
373 str(new) != str(node)):
374 node.replace(new)
375 node = new
376
377 def write_file(self, new_text, filename, old_text=None):
378 """Writes a string to a file.
379
380 If there are no changes, this is a no-op.
381
382 Otherwise, it first shows a unified diff between the old text
383 and the new text, and then rewrites the file; the latter is
384 only done if the write option is set.
385 """
386 self.files.append(filename)
387 if old_text is None:
388 try:
389 f = open(filename, "r")
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000390 except IOError as err:
Martin v. Löwisef04c442008-03-19 05:04:44 +0000391 self.log_error("Can't read %s: %s", filename, err)
392 return
393 try:
394 old_text = f.read()
395 finally:
396 f.close()
397 if old_text == new_text:
398 if self.options.verbose:
399 self.log_message("No changes to %s", filename)
400 return
401 diff_texts(old_text, new_text, filename)
402 if not self.options.write:
403 if self.options.verbose:
404 self.log_message("Not writing changes to %s", filename)
405 return
406 backup = filename + ".bak"
407 if os.path.lexists(backup):
408 try:
409 os.remove(backup)
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000410 except os.error as err:
Martin v. Löwisef04c442008-03-19 05:04:44 +0000411 self.log_message("Can't remove backup %s", backup)
412 try:
413 os.rename(filename, backup)
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000414 except os.error as err:
Martin v. Löwisef04c442008-03-19 05:04:44 +0000415 self.log_message("Can't rename %s to %s", filename, backup)
416 try:
417 f = open(filename, "w")
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000418 except os.error as err:
Martin v. Löwisef04c442008-03-19 05:04:44 +0000419 self.log_error("Can't create %s: %s", filename, err)
420 return
421 try:
422 try:
423 f.write(new_text)
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000424 except os.error as err:
Martin v. Löwisef04c442008-03-19 05:04:44 +0000425 self.log_error("Can't write %s: %s", filename, err)
426 finally:
427 f.close()
428 if self.options.verbose:
429 self.log_message("Wrote changes to %s", filename)
430
431 PS1 = ">>> "
432 PS2 = "... "
433
434 def refactor_docstring(self, input, filename):
435 """Refactors a docstring, looking for doctests.
436
437 This returns a modified version of the input string. It looks
438 for doctests, which start with a ">>>" prompt, and may be
439 continued with "..." prompts, as long as the "..." is indented
440 the same as the ">>>".
441
442 (Unfortunately we can't use the doctest module's parser,
443 since, like most parsers, it is not geared towards preserving
444 the original source.)
445 """
446 result = []
447 block = None
448 block_lineno = None
449 indent = None
450 lineno = 0
451 for line in input.splitlines(True):
452 lineno += 1
453 if line.lstrip().startswith(self.PS1):
454 if block is not None:
455 result.extend(self.refactor_doctest(block, block_lineno,
456 indent, filename))
457 block_lineno = lineno
458 block = [line]
459 i = line.find(self.PS1)
460 indent = line[:i]
461 elif (indent is not None and
462 (line.startswith(indent + self.PS2) or
463 line == indent + self.PS2.rstrip() + "\n")):
464 block.append(line)
465 else:
466 if block is not None:
467 result.extend(self.refactor_doctest(block, block_lineno,
468 indent, filename))
469 block = None
470 indent = None
471 result.append(line)
472 if block is not None:
473 result.extend(self.refactor_doctest(block, block_lineno,
474 indent, filename))
475 return "".join(result)
476
477 def refactor_doctest(self, block, lineno, indent, filename):
478 """Refactors one doctest.
479
480 A doctest is given as a block of lines, the first of which starts
481 with ">>>" (possibly indented), while the remaining lines start
482 with "..." (identically indented).
483
484 """
485 try:
486 tree = self.parse_block(block, lineno, indent)
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000487 except Exception as err:
Martin v. Löwisef04c442008-03-19 05:04:44 +0000488 if self.options.verbose:
489 for line in block:
490 self.log_message("Source: %s", line.rstrip("\n"))
491 self.log_error("Can't parse docstring in %s line %s: %s: %s",
492 filename, lineno, err.__class__.__name__, err)
493 return block
494 if self.refactor_tree(tree, filename):
495 new = str(tree).splitlines(True)
496 # Undo the adjustment of the line numbers in wrap_toks() below.
497 clipped, new = new[:lineno-1], new[lineno-1:]
498 assert clipped == ["\n"] * (lineno-1), clipped
499 if not new[-1].endswith("\n"):
500 new[-1] += "\n"
501 block = [indent + self.PS1 + new.pop(0)]
502 if new:
503 block += [indent + self.PS2 + line for line in new]
504 return block
505
506 def summarize(self):
507 if self.options.write:
508 were = "were"
509 else:
510 were = "need to be"
511 if not self.files:
512 self.log_message("No files %s modified.", were)
513 else:
514 self.log_message("Files that %s modified:", were)
515 for file in self.files:
516 self.log_message(file)
517 if self.fixer_log:
518 self.log_message("Warnings/messages while refactoring:")
519 for message in self.fixer_log:
520 self.log_message(message)
521 if self.errors:
522 if len(self.errors) == 1:
523 self.log_message("There was 1 error:")
524 else:
525 self.log_message("There were %d errors:", len(self.errors))
526 for msg, args, kwds in self.errors:
527 self.log_message(msg, *args, **kwds)
528
529 def parse_block(self, block, lineno, indent):
530 """Parses a block into a tree.
531
532 This is necessary to get correct line number / offset information
533 in the parser diagnostics and embedded into the parse tree.
534 """
535 return self.driver.parse_tokens(self.wrap_toks(block, lineno, indent))
536
537 def wrap_toks(self, block, lineno, indent):
538 """Wraps a tokenize stream to systematically modify start/end."""
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000539 tokens = tokenize.generate_tokens(self.gen_lines(block, indent).__next__)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000540 for type, value, (line0, col0), (line1, col1), line_text in tokens:
541 line0 += lineno - 1
542 line1 += lineno - 1
543 # Don't bother updating the columns; this is too complicated
544 # since line_text would also have to be updated and it would
545 # still break for tokens spanning lines. Let the user guess
546 # that the column numbers for doctests are relative to the
547 # end of the prompt string (PS1 or PS2).
548 yield type, value, (line0, col0), (line1, col1), line_text
549
550
551 def gen_lines(self, block, indent):
552 """Generates lines as expected by tokenize from a list of lines.
553
554 This strips the first len(indent + self.PS1) characters off each line.
555 """
556 prefix1 = indent + self.PS1
557 prefix2 = indent + self.PS2
558 prefix = prefix1
559 for line in block:
560 if line.startswith(prefix):
561 yield line[len(prefix):]
562 elif line == prefix.rstrip() + "\n":
563 yield "\n"
564 else:
565 raise AssertionError("line=%r, prefix=%r" % (line, prefix))
566 prefix = prefix2
567 while True:
568 yield ""
569
570
571def diff_texts(a, b, filename):
572 """Prints a unified diff of two strings."""
573 a = a.splitlines()
574 b = b.splitlines()
575 for line in difflib.unified_diff(a, b, filename, filename,
576 "(original)", "(refactored)",
577 lineterm=""):
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000578 print(line)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000579
580
581if __name__ == "__main__":
Martin v. Löwisf733c602008-03-19 05:26:18 +0000582 sys.exit(main())