blob: 339750edbaacd4c6e3cb6e369f87489c9d0cb442 [file] [log] [blame]
Benjamin Petersonf37eb3a2010-10-14 23:00:04 +00001"Utility functions used by the btm_matcher module"
2
3from . import pytree
4from .pgen2 import grammar, token
5from .pygram import pattern_symbols, python_symbols
6
7syms = pattern_symbols
8pysyms = python_symbols
9tokens = grammar.opmap
10token_labels = token
11
12TYPE_ANY = -1
13TYPE_ALTERNATIVES = -2
14TYPE_GROUP = -3
15
16class MinNode(object):
17 """This class serves as an intermediate representation of the
18 pattern tree during the conversion to sets of leaf-to-root
19 subpatterns"""
20
21 def __init__(self, type=None, name=None):
22 self.type = type
23 self.name = name
24 self.children = []
25 self.leaf = False
26 self.parent = None
27 self.alternatives = []
28 self.group = []
29
30 def __repr__(self):
31 return str(self.type) + ' ' + str(self.name)
32
33 def leaf_to_root(self):
34 """Internal method. Returns a characteristic path of the
35 pattern tree. This method must be run for all leaves until the
36 linear subpatterns are merged into a single"""
37 node = self
38 subp = []
39 while node:
40 if node.type == TYPE_ALTERNATIVES:
41 node.alternatives.append(subp)
42 if len(node.alternatives) == len(node.children):
43 #last alternative
44 subp = [tuple(node.alternatives)]
45 node.alternatives = []
46 node = node.parent
47 continue
48 else:
49 node = node.parent
50 subp = None
51 break
52
53 if node.type == TYPE_GROUP:
54 node.group.append(subp)
55 #probably should check the number of leaves
56 if len(node.group) == len(node.children):
57 subp = get_characteristic_subpattern(node.group)
58 node.group = []
59 node = node.parent
60 continue
61 else:
62 node = node.parent
63 subp = None
64 break
65
66 if node.type == token_labels.NAME and node.name:
67 #in case of type=name, use the name instead
68 subp.append(node.name)
69 else:
70 subp.append(node.type)
71
72 node = node.parent
73 return subp
74
75 def get_linear_subpattern(self):
76 """Drives the leaf_to_root method. The reason that
77 leaf_to_root must be run multiple times is because we need to
78 reject 'group' matches; for example the alternative form
79 (a | b c) creates a group [b c] that needs to be matched. Since
80 matching multiple linear patterns overcomes the automaton's
81 capabilities, leaf_to_root merges each group into a single
82 choice based on 'characteristic'ity,
83
84 i.e. (a|b c) -> (a|b) if b more characteristic than c
85
86 Returns: The most 'characteristic'(as defined by
87 get_characteristic_subpattern) path for the compiled pattern
88 tree.
89 """
90
91 for l in self.leaves():
92 subp = l.leaf_to_root()
93 if subp:
94 return subp
95
96 def leaves(self):
97 "Generator that returns the leaves of the tree"
98 for child in self.children:
Andrew Svetlov7d140152012-10-06 17:11:45 +030099 yield from child.leaves()
Benjamin Petersonf37eb3a2010-10-14 23:00:04 +0000100 if not self.children:
101 yield self
102
103def reduce_tree(node, parent=None):
104 """
105 Internal function. Reduces a compiled pattern tree to an
106 intermediate representation suitable for feeding the
107 automaton. This also trims off any optional pattern elements(like
108 [a], a*).
109 """
110
111 new_node = None
112 #switch on the node type
113 if node.type == syms.Matcher:
114 #skip
115 node = node.children[0]
116
117 if node.type == syms.Alternatives :
118 #2 cases
119 if len(node.children) <= 2:
120 #just a single 'Alternative', skip this node
121 new_node = reduce_tree(node.children[0], parent)
122 else:
123 #real alternatives
124 new_node = MinNode(type=TYPE_ALTERNATIVES)
125 #skip odd children('|' tokens)
126 for child in node.children:
127 if node.children.index(child)%2:
128 continue
129 reduced = reduce_tree(child, new_node)
130 if reduced is not None:
131 new_node.children.append(reduced)
132 elif node.type == syms.Alternative:
133 if len(node.children) > 1:
134
135 new_node = MinNode(type=TYPE_GROUP)
136 for child in node.children:
137 reduced = reduce_tree(child, new_node)
138 if reduced:
139 new_node.children.append(reduced)
140 if not new_node.children:
141 # delete the group if all of the children were reduced to None
142 new_node = None
143
144 else:
145 new_node = reduce_tree(node.children[0], parent)
146
147 elif node.type == syms.Unit:
148 if (isinstance(node.children[0], pytree.Leaf) and
149 node.children[0].value == '('):
150 #skip parentheses
151 return reduce_tree(node.children[1], parent)
152 if ((isinstance(node.children[0], pytree.Leaf) and
153 node.children[0].value == '[')
154 or
155 (len(node.children)>1 and
156 hasattr(node.children[1], "value") and
157 node.children[1].value == '[')):
158 #skip whole unit if its optional
159 return None
160
161 leaf = True
162 details_node = None
163 alternatives_node = None
164 has_repeater = False
165 repeater_node = None
166 has_variable_name = False
167
168 for child in node.children:
169 if child.type == syms.Details:
170 leaf = False
171 details_node = child
172 elif child.type == syms.Repeater:
173 has_repeater = True
174 repeater_node = child
175 elif child.type == syms.Alternatives:
176 alternatives_node = child
177 if hasattr(child, 'value') and child.value == '=': # variable name
178 has_variable_name = True
179
180 #skip variable name
181 if has_variable_name:
182 #skip variable name, '='
183 name_leaf = node.children[2]
184 if hasattr(name_leaf, 'value') and name_leaf.value == '(':
185 # skip parenthesis
186 name_leaf = node.children[3]
187 else:
188 name_leaf = node.children[0]
189
190 #set node type
191 if name_leaf.type == token_labels.NAME:
192 #(python) non-name or wildcard
193 if name_leaf.value == 'any':
194 new_node = MinNode(type=TYPE_ANY)
195 else:
196 if hasattr(token_labels, name_leaf.value):
197 new_node = MinNode(type=getattr(token_labels, name_leaf.value))
198 else:
199 new_node = MinNode(type=getattr(pysyms, name_leaf.value))
200
201 elif name_leaf.type == token_labels.STRING:
202 #(python) name or character; remove the apostrophes from
203 #the string value
204 name = name_leaf.value.strip("'")
205 if name in tokens:
206 new_node = MinNode(type=tokens[name])
207 else:
208 new_node = MinNode(type=token_labels.NAME, name=name)
209 elif name_leaf.type == syms.Alternatives:
210 new_node = reduce_tree(alternatives_node, parent)
211
212 #handle repeaters
213 if has_repeater:
214 if repeater_node.children[0].value == '*':
215 #reduce to None
216 new_node = None
217 elif repeater_node.children[0].value == '+':
218 #reduce to a single occurence i.e. do nothing
219 pass
220 else:
221 #TODO: handle {min, max} repeaters
222 raise NotImplementedError
223 pass
224
225 #add children
226 if details_node and new_node is not None:
227 for child in details_node.children[1:-1]:
228 #skip '<', '>' markers
229 reduced = reduce_tree(child, new_node)
230 if reduced is not None:
231 new_node.children.append(reduced)
232 if new_node:
233 new_node.parent = parent
234 return new_node
235
236
237def get_characteristic_subpattern(subpatterns):
238 """Picks the most characteristic from a list of linear patterns
239 Current order used is:
240 names > common_names > common_chars
241 """
242 if not isinstance(subpatterns, list):
243 return subpatterns
244 if len(subpatterns)==1:
245 return subpatterns[0]
246
247 # first pick out the ones containing variable names
248 subpatterns_with_names = []
249 subpatterns_with_common_names = []
250 common_names = ['in', 'for', 'if' , 'not', 'None']
251 subpatterns_with_common_chars = []
252 common_chars = "[]().,:"
253 for subpattern in subpatterns:
254 if any(rec_test(subpattern, lambda x: type(x) is str)):
255 if any(rec_test(subpattern,
256 lambda x: isinstance(x, str) and x in common_chars)):
257 subpatterns_with_common_chars.append(subpattern)
258 elif any(rec_test(subpattern,
259 lambda x: isinstance(x, str) and x in common_names)):
260 subpatterns_with_common_names.append(subpattern)
261
262 else:
263 subpatterns_with_names.append(subpattern)
264
265 if subpatterns_with_names:
266 subpatterns = subpatterns_with_names
267 elif subpatterns_with_common_names:
268 subpatterns = subpatterns_with_common_names
269 elif subpatterns_with_common_chars:
270 subpatterns = subpatterns_with_common_chars
271 # of the remaining subpatterns pick out the longest one
272 return max(subpatterns, key=len)
273
274def rec_test(sequence, test_func):
275 """Tests test_func on all items of sequence and items of included
276 sub-iterables"""
277 for x in sequence:
278 if isinstance(x, (list, tuple)):
Andrew Svetlov7d140152012-10-06 17:11:45 +0300279 yield from rec_test(x, test_func)
Benjamin Petersonf37eb3a2010-10-14 23:00:04 +0000280 else:
281 yield test_func(x)