blob: eabe1a71a654acb76434d0ceab6b214069e26170 [file] [log] [blame]
Benjamin Petersonf37eb3a2010-10-14 23:00:04 +00001"""A bottom-up tree matching algorithm implementation meant to speed
2up 2to3's matching process. After the tree patterns are reduced to
3their rarest linear path, a linear Aho-Corasick automaton is
4created. The linear automaton traverses the linear paths from the
5leaves to the root of the AST and returns a set of nodes for further
6matching. This reduces significantly the number of candidate nodes."""
7
8__author__ = "George Boutsioukis <gboutsioukis@gmail.com>"
9
10import logging
11import itertools
12from collections import defaultdict
13
14from . import pytree
15from .btm_utils import reduce_tree
16
17class BMNode(object):
18 """Class for a node of the Aho-Corasick automaton used in matching"""
19 count = itertools.count()
20 def __init__(self):
21 self.transition_table = {}
22 self.fixers = []
23 self.id = next(BMNode.count)
24 self.content = ''
25
26class BottomMatcher(object):
27 """The main matcher class. After instantiating the patterns should
28 be added using the add_fixer method"""
29
30 def __init__(self):
31 self.match = set()
32 self.root = BMNode()
33 self.nodes = [self.root]
34 self.fixers = []
35 self.logger = logging.getLogger("RefactoringTool")
36
37 def add_fixer(self, fixer):
38 """Reduces a fixer's pattern tree to a linear path and adds it
39 to the matcher(a common Aho-Corasick automaton). The fixer is
40 appended on the matching states and called when they are
41 reached"""
42 self.fixers.append(fixer)
43 tree = reduce_tree(fixer.pattern_tree)
44 linear = tree.get_linear_subpattern()
45 match_nodes = self.add(linear, start=self.root)
46 for match_node in match_nodes:
47 match_node.fixers.append(fixer)
48
49 def add(self, pattern, start):
50 "Recursively adds a linear pattern to the AC automaton"
51 #print("adding pattern", pattern, "to", start)
52 if not pattern:
53 #print("empty pattern")
54 return [start]
55 if isinstance(pattern[0], tuple):
56 #alternatives
57 #print("alternatives")
58 match_nodes = []
59 for alternative in pattern[0]:
60 #add all alternatives, and add the rest of the pattern
61 #to each end node
62 end_nodes = self.add(alternative, start=start)
63 for end in end_nodes:
64 match_nodes.extend(self.add(pattern[1:], end))
65 return match_nodes
66 else:
67 #single token
68 #not last
69 if pattern[0] not in start.transition_table:
70 #transition did not exist, create new
71 next_node = BMNode()
72 start.transition_table[pattern[0]] = next_node
73 else:
74 #transition exists already, follow
75 next_node = start.transition_table[pattern[0]]
76
77 if pattern[1:]:
78 end_nodes = self.add(pattern[1:], start=next_node)
79 else:
80 end_nodes = [next_node]
81 return end_nodes
82
83 def run(self, leaves):
84 """The main interface with the bottom matcher. The tree is
85 traversed from the bottom using the constructed
86 automaton. Nodes are only checked once as the tree is
87 retraversed. When the automaton fails, we give it one more
88 shot(in case the above tree matches as a whole with the
89 rejected leaf), then we break for the next leaf. There is the
90 special case of multiple arguments(see code comments) where we
91 recheck the nodes
92
93 Args:
94 The leaves of the AST tree to be matched
95
96 Returns:
97 A dictionary of node matches with fixers as the keys
98 """
99 current_ac_node = self.root
100 results = defaultdict(list)
101 for leaf in leaves:
102 current_ast_node = leaf
103 while current_ast_node:
104 current_ast_node.was_checked = True
105 for child in current_ast_node.children:
106 # multiple statements, recheck
107 if isinstance(child, pytree.Leaf) and child.value == ";":
108 current_ast_node.was_checked = False
109 break
110 if current_ast_node.type == 1:
111 #name
112 node_token = current_ast_node.value
113 else:
114 node_token = current_ast_node.type
115
116 if node_token in current_ac_node.transition_table:
117 #token matches
118 current_ac_node = current_ac_node.transition_table[node_token]
119 for fixer in current_ac_node.fixers:
120 if not fixer in results:
121 results[fixer] = []
122 results[fixer].append(current_ast_node)
123
124 else:
125 #matching failed, reset automaton
126 current_ac_node = self.root
127 if (current_ast_node.parent is not None
128 and current_ast_node.parent.was_checked):
129 #the rest of the tree upwards has been checked, next leaf
130 break
131
132 #recheck the rejected node once from the root
133 if node_token in current_ac_node.transition_table:
134 #token matches
135 current_ac_node = current_ac_node.transition_table[node_token]
136 for fixer in current_ac_node.fixers:
137 if not fixer in results.keys():
138 results[fixer] = []
139 results[fixer].append(current_ast_node)
140
141 current_ast_node = current_ast_node.parent
142 return results
143
144 def print_ac(self):
145 "Prints a graphviz diagram of the BM automaton(for debugging)"
146 print("digraph g{")
147 def print_node(node):
148 for subnode_key in node.transition_table.keys():
149 subnode = node.transition_table[subnode_key]
150 print("%d -> %d [label=%s] //%s" %
151 (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers)))
152 if subnode_key == 1:
153 print(subnode.content)
154 print_node(subnode)
155 print_node(self.root)
156 print("}")
157
158# taken from pytree.py for debugging; only used by print_ac
159_type_reprs = {}
160def type_repr(type_num):
161 global _type_reprs
162 if not _type_reprs:
163 from .pygram import python_symbols
164 # printing tokens is possible but not as useful
165 # from .pgen2 import token // token.__dict__.items():
166 for name, val in python_symbols.__dict__.items():
167 if type(val) == int: _type_reprs[val] = name
168 return _type_reprs.setdefault(type_num, type_num)