blob: ad3592c05f7ed320945b3c372be39363aa506915 [file] [log] [blame]
Martin v. Löwisef04c442008-03-19 05:04:44 +00001# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
Benjamin Peterson608d8bc2009-05-05 23:23:31 +00004"""
5Python parse tree definitions.
Martin v. Löwisef04c442008-03-19 05:04:44 +00006
7This is a very concrete parse tree; we need to keep every token and
8even the comments and whitespace between tokens.
9
10There's also a pattern matching implementation here.
11"""
12
13__author__ = "Guido van Rossum <guido@python.org>"
14
Benjamin Peterson5cff9312008-11-28 23:01:28 +000015import sys
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +000016import warnings
Benjamin Peterson5cff9312008-11-28 23:01:28 +000017from io import StringIO
18
Martin v. Löwisef04c442008-03-19 05:04:44 +000019HUGE = 0x7FFFFFFF # maximum repeat count, default max
20
Martin v. Löwis3faa84f2008-03-22 00:07:09 +000021_type_reprs = {}
22def type_repr(type_num):
23 global _type_reprs
24 if not _type_reprs:
25 from .pygram import python_symbols
26 # printing tokens is possible but not as useful
27 # from .pgen2 import token // token.__dict__.items():
28 for name, val in python_symbols.__dict__.items():
29 if type(val) == int: _type_reprs[val] = name
30 return _type_reprs.setdefault(type_num, type_num)
31
Martin v. Löwisef04c442008-03-19 05:04:44 +000032class Base(object):
33
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000034 """
35 Abstract base class for Node and Leaf.
Martin v. Löwisef04c442008-03-19 05:04:44 +000036
37 This provides some default functionality and boilerplate using the
38 template pattern.
39
40 A node may be a subnode of at most one parent.
41 """
42
43 # Default values for instance variables
44 type = None # int: token number (< 256) or symbol number (>= 256)
45 parent = None # Parent node pointer, or None
46 children = () # Tuple of subnodes
47 was_changed = False
Benjamin Petersonb0871ca2010-10-14 23:03:32 +000048 was_checked = False
Martin v. Löwisef04c442008-03-19 05:04:44 +000049
50 def __new__(cls, *args, **kwds):
51 """Constructor that prevents Base from being instantiated."""
52 assert cls is not Base, "Cannot instantiate Base"
53 return object.__new__(cls)
54
55 def __eq__(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000056 """
57 Compare two nodes for equality.
Martin v. Löwisef04c442008-03-19 05:04:44 +000058
59 This calls the method _eq().
60 """
61 if self.__class__ is not other.__class__:
62 return NotImplemented
63 return self._eq(other)
64
Benjamin Petersone80b51f2009-11-02 18:33:36 +000065 __hash__ = None # For Py3 compatibility.
66
Martin v. Löwisef04c442008-03-19 05:04:44 +000067 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000068 """
69 Compare two nodes for equality.
Martin v. Löwisef04c442008-03-19 05:04:44 +000070
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000071 This is called by __eq__ and __ne__. It is only called if the two nodes
72 have the same type. This must be implemented by the concrete subclass.
73 Nodes should be considered equal if they have the same structure,
74 ignoring the prefix string and other context information.
Martin v. Löwisef04c442008-03-19 05:04:44 +000075 """
76 raise NotImplementedError
77
78 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000079 """
80 Return a cloned (deep) copy of self.
Martin v. Löwisef04c442008-03-19 05:04:44 +000081
82 This must be implemented by the concrete subclass.
83 """
84 raise NotImplementedError
85
86 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000087 """
88 Return a post-order iterator for the tree.
Martin v. Löwisef04c442008-03-19 05:04:44 +000089
90 This must be implemented by the concrete subclass.
91 """
92 raise NotImplementedError
93
94 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000095 """
96 Return a pre-order iterator for the tree.
Martin v. Löwisef04c442008-03-19 05:04:44 +000097
98 This must be implemented by the concrete subclass.
99 """
100 raise NotImplementedError
101
Martin v. Löwisef04c442008-03-19 05:04:44 +0000102 def replace(self, new):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000103 """Replace this node with a new one in the parent."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000104 assert self.parent is not None, str(self)
105 assert new is not None
106 if not isinstance(new, list):
107 new = [new]
108 l_children = []
109 found = False
110 for ch in self.parent.children:
111 if ch is self:
112 assert not found, (self.parent.children, self, new)
113 if new is not None:
114 l_children.extend(new)
115 found = True
116 else:
117 l_children.append(ch)
118 assert found, (self.children, self, new)
119 self.parent.changed()
120 self.parent.children = l_children
121 for x in new:
122 x.parent = self.parent
123 self.parent = None
124
125 def get_lineno(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000126 """Return the line number which generated the invocant node."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000127 node = self
128 while not isinstance(node, Leaf):
129 if not node.children:
130 return
131 node = node.children[0]
132 return node.lineno
133
134 def changed(self):
135 if self.parent:
136 self.parent.changed()
137 self.was_changed = True
138
139 def remove(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000140 """
141 Remove the node from the tree. Returns the position of the node in its
142 parent's children before it was removed.
143 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000144 if self.parent:
145 for i, node in enumerate(self.parent.children):
146 if node is self:
147 self.parent.changed()
148 del self.parent.children[i]
149 self.parent = None
150 return i
151
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000152 @property
153 def next_sibling(self):
154 """
155 The node immediately following the invocant in their parent's children
156 list. If the invocant does not have a next sibling, it is None
157 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000158 if self.parent is None:
159 return None
160
161 # Can't use index(); we need to test by identity
Martin v. Löwis3de92bf2008-04-10 02:50:50 +0000162 for i, child in enumerate(self.parent.children):
163 if child is self:
Martin v. Löwisef04c442008-03-19 05:04:44 +0000164 try:
165 return self.parent.children[i+1]
166 except IndexError:
167 return None
168
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000169 @property
170 def prev_sibling(self):
171 """
172 The node immediately preceding the invocant in their parent's children
173 list. If the invocant does not have a previous sibling, it is None.
174 """
Martin v. Löwis3de92bf2008-04-10 02:50:50 +0000175 if self.parent is None:
176 return None
177
178 # Can't use index(); we need to test by identity
179 for i, child in enumerate(self.parent.children):
180 if child is self:
181 if i == 0:
182 return None
183 return self.parent.children[i-1]
184
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000185 def leaves(self):
186 for child in self.children:
Andrew Svetlov7d140152012-10-06 17:11:45 +0300187 yield from child.leaves()
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000188
189 def depth(self):
190 if self.parent is None:
191 return 0
192 return 1 + self.parent.depth()
193
Martin v. Löwisef04c442008-03-19 05:04:44 +0000194 def get_suffix(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000195 """
196 Return the string immediately following the invocant node. This is
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000197 effectively equivalent to node.next_sibling.prefix
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000198 """
199 next_sib = self.next_sibling
Martin v. Löwisef04c442008-03-19 05:04:44 +0000200 if next_sib is None:
201 return ""
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000202 return next_sib.prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000203
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000204 if sys.version_info < (3, 0):
205 def __str__(self):
206 return str(self).encode("ascii")
207
Martin v. Löwisef04c442008-03-19 05:04:44 +0000208class Node(Base):
209
210 """Concrete implementation for interior nodes."""
211
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000212 def __init__(self,type, children,
213 context=None,
214 prefix=None,
215 fixers_applied=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000216 """
217 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000218
219 Takes a type constant (a symbol number >= 256), a sequence of
220 child nodes, and an optional context keyword argument.
221
222 As a side effect, the parent pointers of the children are updated.
223 """
224 assert type >= 256, type
225 self.type = type
226 self.children = list(children)
227 for ch in self.children:
228 assert ch.parent is None, repr(ch)
229 ch.parent = self
230 if prefix is not None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000231 self.prefix = prefix
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000232 if fixers_applied:
233 self.fixers_applied = fixers_applied[:]
234 else:
235 self.fixers_applied = None
Martin v. Löwisef04c442008-03-19 05:04:44 +0000236
237 def __repr__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000238 """Return a canonical string representation."""
Martin v. Löwis3faa84f2008-03-22 00:07:09 +0000239 return "%s(%s, %r)" % (self.__class__.__name__,
240 type_repr(self.type),
Martin v. Löwisef04c442008-03-19 05:04:44 +0000241 self.children)
242
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000243 def __unicode__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000244 """
245 Return a pretty string representation.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000246
247 This reproduces the input source exactly.
248 """
249 return "".join(map(str, self.children))
250
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000251 if sys.version_info > (3, 0):
252 __str__ = __unicode__
253
Martin v. Löwisef04c442008-03-19 05:04:44 +0000254 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000255 """Compare two nodes for equality."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000256 return (self.type, self.children) == (other.type, other.children)
257
258 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000259 """Return a cloned (deep) copy of self."""
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000260 return Node(self.type, [ch.clone() for ch in self.children],
261 fixers_applied=self.fixers_applied)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000262
263 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000264 """Return a post-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000265 for child in self.children:
Andrew Svetlov7d140152012-10-06 17:11:45 +0300266 yield from child.post_order()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000267 yield self
268
269 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000270 """Return a pre-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000271 yield self
272 for child in self.children:
Andrew Svetlov7d140152012-10-06 17:11:45 +0300273 yield from child.pre_order()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000274
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000275 def _prefix_getter(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000276 """
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000277 The whitespace and comments preceding this node in the input.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000278 """
279 if not self.children:
280 return ""
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000281 return self.children[0].prefix
282
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000283 def _prefix_setter(self, prefix):
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000284 if self.children:
285 self.children[0].prefix = prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000286
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000287 prefix = property(_prefix_getter, _prefix_setter)
288
Martin v. Löwisef04c442008-03-19 05:04:44 +0000289 def set_child(self, i, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000290 """
291 Equivalent to 'node.children[i] = child'. This method also sets the
292 child's parent attribute appropriately.
293 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000294 child.parent = self
295 self.children[i].parent = None
296 self.children[i] = child
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000297 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000298
299 def insert_child(self, i, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000300 """
301 Equivalent to 'node.children.insert(i, child)'. This method also sets
302 the child's parent attribute appropriately.
303 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000304 child.parent = self
305 self.children.insert(i, child)
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000306 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000307
308 def append_child(self, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000309 """
310 Equivalent to 'node.children.append(child)'. This method also sets the
311 child's parent attribute appropriately.
312 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000313 child.parent = self
314 self.children.append(child)
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000315 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000316
317
318class Leaf(Base):
319
320 """Concrete implementation for leaf nodes."""
321
322 # Default values for instance variables
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000323 _prefix = "" # Whitespace and comments preceding this token in the input
324 lineno = 0 # Line where this token starts in the input
325 column = 0 # Column where this token tarts in the input
Martin v. Löwisef04c442008-03-19 05:04:44 +0000326
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000327 def __init__(self, type, value,
328 context=None,
329 prefix=None,
330 fixers_applied=[]):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000331 """
332 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000333
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000334 Takes a type constant (a token number < 256), a string value, and an
335 optional context keyword argument.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000336 """
337 assert 0 <= type < 256, type
338 if context is not None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000339 self._prefix, (self.lineno, self.column) = context
Martin v. Löwisef04c442008-03-19 05:04:44 +0000340 self.type = type
341 self.value = value
342 if prefix is not None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000343 self._prefix = prefix
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000344 self.fixers_applied = fixers_applied[:]
Martin v. Löwisef04c442008-03-19 05:04:44 +0000345
346 def __repr__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000347 """Return a canonical string representation."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000348 return "%s(%r, %r)" % (self.__class__.__name__,
349 self.type,
350 self.value)
351
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000352 def __unicode__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000353 """
354 Return a pretty string representation.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000355
356 This reproduces the input source exactly.
357 """
358 return self.prefix + str(self.value)
359
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000360 if sys.version_info > (3, 0):
361 __str__ = __unicode__
362
Martin v. Löwisef04c442008-03-19 05:04:44 +0000363 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000364 """Compare two nodes for equality."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000365 return (self.type, self.value) == (other.type, other.value)
366
367 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000368 """Return a cloned (deep) copy of self."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000369 return Leaf(self.type, self.value,
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000370 (self.prefix, (self.lineno, self.column)),
371 fixers_applied=self.fixers_applied)
372
373 def leaves(self):
374 yield self
Martin v. Löwisef04c442008-03-19 05:04:44 +0000375
376 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000377 """Return a post-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000378 yield self
379
380 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000381 """Return a pre-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000382 yield self
383
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000384 def _prefix_getter(self):
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000385 """
386 The whitespace and comments preceding this token in the input.
387 """
388 return self._prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000389
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000390 def _prefix_setter(self, prefix):
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000391 self.changed()
392 self._prefix = prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000393
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000394 prefix = property(_prefix_getter, _prefix_setter)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000395
396def convert(gr, raw_node):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000397 """
398 Convert raw node information to a Node or Leaf instance.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000399
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000400 This is passed to the parser driver which calls it whenever a reduction of a
401 grammar rule produces a new complete node, so that the tree is build
402 strictly bottom-up.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000403 """
404 type, value, context, children = raw_node
405 if children or type in gr.number2symbol:
406 # If there's exactly one child, return that child instead of
407 # creating a new node.
408 if len(children) == 1:
409 return children[0]
410 return Node(type, children, context=context)
411 else:
412 return Leaf(type, value, context=context)
413
414
415class BasePattern(object):
416
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000417 """
418 A pattern is a tree matching pattern.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000419
420 It looks for a specific node type (token or symbol), and
421 optionally for a specific content.
422
423 This is an abstract base class. There are three concrete
424 subclasses:
425
426 - LeafPattern matches a single leaf node;
427 - NodePattern matches a single node (usually non-leaf);
428 - WildcardPattern matches a sequence of nodes of variable length.
429 """
430
431 # Defaults for instance variables
432 type = None # Node type (token if < 256, symbol if >= 256)
433 content = None # Optional content matching pattern
434 name = None # Optional name used to store match in results dict
435
436 def __new__(cls, *args, **kwds):
437 """Constructor that prevents BasePattern from being instantiated."""
438 assert cls is not BasePattern, "Cannot instantiate BasePattern"
439 return object.__new__(cls)
440
441 def __repr__(self):
Martin v. Löwis3faa84f2008-03-22 00:07:09 +0000442 args = [type_repr(self.type), self.content, self.name]
Martin v. Löwisef04c442008-03-19 05:04:44 +0000443 while args and args[-1] is None:
444 del args[-1]
445 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
446
447 def optimize(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000448 """
449 A subclass can define this as a hook for optimizations.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000450
451 Returns either self or another node with the same effect.
452 """
453 return self
454
455 def match(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000456 """
457 Does this pattern exactly match a node?
Martin v. Löwisef04c442008-03-19 05:04:44 +0000458
459 Returns True if it matches, False if not.
460
461 If results is not None, it must be a dict which will be
462 updated with the nodes matching named subpatterns.
463
464 Default implementation for non-wildcard patterns.
465 """
466 if self.type is not None and node.type != self.type:
467 return False
468 if self.content is not None:
469 r = None
470 if results is not None:
471 r = {}
472 if not self._submatch(node, r):
473 return False
474 if r:
475 results.update(r)
476 if results is not None and self.name:
477 results[self.name] = node
478 return True
479
480 def match_seq(self, nodes, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000481 """
482 Does this pattern exactly match a sequence of nodes?
Martin v. Löwisef04c442008-03-19 05:04:44 +0000483
484 Default implementation for non-wildcard patterns.
485 """
486 if len(nodes) != 1:
487 return False
488 return self.match(nodes[0], results)
489
490 def generate_matches(self, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000491 """
492 Generator yielding all matches for this pattern.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000493
494 Default implementation for non-wildcard patterns.
495 """
496 r = {}
497 if nodes and self.match(nodes[0], r):
498 yield 1, r
499
500
501class LeafPattern(BasePattern):
502
503 def __init__(self, type=None, content=None, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000504 """
505 Initializer. Takes optional type, content, and name.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000506
507 The type, if given must be a token type (< 256). If not given,
508 this matches any *leaf* node; the content may still be required.
509
510 The content, if given, must be a string.
511
512 If a name is given, the matching node is stored in the results
513 dict under that key.
514 """
515 if type is not None:
516 assert 0 <= type < 256, type
517 if content is not None:
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000518 assert isinstance(content, str), repr(content)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000519 self.type = type
520 self.content = content
521 self.name = name
522
523 def match(self, node, results=None):
524 """Override match() to insist on a leaf node."""
525 if not isinstance(node, Leaf):
526 return False
527 return BasePattern.match(self, node, results)
528
529 def _submatch(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000530 """
531 Match the pattern's content to the node's children.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000532
533 This assumes the node type matches and self.content is not None.
534
535 Returns True if it matches, False if not.
536
537 If results is not None, it must be a dict which will be
538 updated with the nodes matching named subpatterns.
539
540 When returning False, the results dict may still be updated.
541 """
542 return self.content == node.value
543
544
545class NodePattern(BasePattern):
546
547 wildcards = False
548
549 def __init__(self, type=None, content=None, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000550 """
551 Initializer. Takes optional type, content, and name.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000552
553 The type, if given, must be a symbol type (>= 256). If the
554 type is None this matches *any* single node (leaf or not),
555 except if content is not None, in which it only matches
556 non-leaf nodes that also match the content pattern.
557
558 The content, if not None, must be a sequence of Patterns that
559 must match the node's children exactly. If the content is
560 given, the type must not be None.
561
562 If a name is given, the matching node is stored in the results
563 dict under that key.
564 """
565 if type is not None:
566 assert type >= 256, type
567 if content is not None:
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000568 assert not isinstance(content, str), repr(content)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000569 content = list(content)
570 for i, item in enumerate(content):
571 assert isinstance(item, BasePattern), (i, item)
572 if isinstance(item, WildcardPattern):
573 self.wildcards = True
574 self.type = type
575 self.content = content
576 self.name = name
577
578 def _submatch(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000579 """
580 Match the pattern's content to the node's children.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000581
582 This assumes the node type matches and self.content is not None.
583
584 Returns True if it matches, False if not.
585
586 If results is not None, it must be a dict which will be
587 updated with the nodes matching named subpatterns.
588
589 When returning False, the results dict may still be updated.
590 """
591 if self.wildcards:
592 for c, r in generate_matches(self.content, node.children):
593 if c == len(node.children):
594 if results is not None:
595 results.update(r)
596 return True
597 return False
598 if len(self.content) != len(node.children):
599 return False
600 for subpattern, child in zip(self.content, node.children):
601 if not subpattern.match(child, results):
602 return False
603 return True
604
605
606class WildcardPattern(BasePattern):
607
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000608 """
609 A wildcard pattern can match zero or more nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000610
611 This has all the flexibility needed to implement patterns like:
612
613 .* .+ .? .{m,n}
614 (a b c | d e | f)
615 (...)* (...)+ (...)? (...){m,n}
616
617 except it always uses non-greedy matching.
618 """
619
620 def __init__(self, content=None, min=0, max=HUGE, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000621 """
622 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000623
624 Args:
625 content: optional sequence of subsequences of patterns;
626 if absent, matches one node;
627 if present, each subsequence is an alternative [*]
Ezio Melotti13925002011-03-16 11:05:33 +0200628 min: optional minimum number of times to match, default 0
629 max: optional maximum number of times to match, default HUGE
Martin v. Löwisef04c442008-03-19 05:04:44 +0000630 name: optional name assigned to this match
631
632 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
633 equivalent to (a b c | d e | f g h); if content is None,
634 this is equivalent to '.' in regular expression terms.
635 The min and max parameters work as follows:
636 min=0, max=maxint: .*
637 min=1, max=maxint: .+
638 min=0, max=1: .?
639 min=1, max=1: .
640 If content is not None, replace the dot with the parenthesized
641 list of alternatives, e.g. (a b c | d e | f g h)*
642 """
643 assert 0 <= min <= max <= HUGE, (min, max)
644 if content is not None:
645 content = tuple(map(tuple, content)) # Protect against alterations
646 # Check sanity of alternatives
647 assert len(content), repr(content) # Can't have zero alternatives
648 for alt in content:
649 assert len(alt), repr(alt) # Can have empty alternatives
650 self.content = content
651 self.min = min
652 self.max = max
653 self.name = name
654
655 def optimize(self):
656 """Optimize certain stacked wildcard patterns."""
657 subpattern = None
658 if (self.content is not None and
659 len(self.content) == 1 and len(self.content[0]) == 1):
660 subpattern = self.content[0][0]
661 if self.min == 1 and self.max == 1:
662 if self.content is None:
663 return NodePattern(name=self.name)
664 if subpattern is not None and self.name == subpattern.name:
665 return subpattern.optimize()
666 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
667 subpattern.min <= 1 and self.name == subpattern.name):
668 return WildcardPattern(subpattern.content,
669 self.min*subpattern.min,
670 self.max*subpattern.max,
671 subpattern.name)
672 return self
673
674 def match(self, node, results=None):
675 """Does this pattern exactly match a node?"""
676 return self.match_seq([node], results)
677
678 def match_seq(self, nodes, results=None):
679 """Does this pattern exactly match a sequence of nodes?"""
680 for c, r in self.generate_matches(nodes):
681 if c == len(nodes):
682 if results is not None:
683 results.update(r)
684 if self.name:
685 results[self.name] = list(nodes)
686 return True
687 return False
688
689 def generate_matches(self, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000690 """
691 Generator yielding matches for a sequence of nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000692
693 Args:
694 nodes: sequence of nodes
695
696 Yields:
697 (count, results) tuples where:
698 count: the match comprises nodes[:count];
699 results: dict containing named submatches.
700 """
701 if self.content is None:
702 # Shortcut for special case (see __init__.__doc__)
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000703 for count in range(self.min, 1 + min(len(nodes), self.max)):
Martin v. Löwisef04c442008-03-19 05:04:44 +0000704 r = {}
705 if self.name:
706 r[self.name] = nodes[:count]
707 yield count, r
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000708 elif self.name == "bare_name":
709 yield self._bare_name_matches(nodes)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000710 else:
Benjamin Peterson5cff9312008-11-28 23:01:28 +0000711 # The reason for this is that hitting the recursion limit usually
712 # results in some ugly messages about how RuntimeErrors are being
Benjamin Petersond86e4c82011-03-06 17:14:30 -0600713 # ignored. We only have to do this on CPython, though, because other
714 # implementations don't have this nasty bug in the first place.
715 if hasattr(sys, "getrefcount"):
716 save_stderr = sys.stderr
717 sys.stderr = StringIO()
Benjamin Peterson7d8d9a52008-10-04 21:04:36 +0000718 try:
719 for count, r in self._recursive_matches(nodes, 0):
720 if self.name:
721 r[self.name] = nodes[:count]
722 yield count, r
723 except RuntimeError:
724 # We fall back to the iterative pattern matching scheme if the recursive
725 # scheme hits the recursion limit.
726 for count, r in self._iterative_matches(nodes):
727 if self.name:
728 r[self.name] = nodes[:count]
729 yield count, r
Benjamin Peterson5cff9312008-11-28 23:01:28 +0000730 finally:
Benjamin Petersond86e4c82011-03-06 17:14:30 -0600731 if hasattr(sys, "getrefcount"):
732 sys.stderr = save_stderr
Benjamin Peterson7d8d9a52008-10-04 21:04:36 +0000733
734 def _iterative_matches(self, nodes):
735 """Helper to iteratively yield the matches."""
736 nodelen = len(nodes)
737 if 0 >= self.min:
738 yield 0, {}
739
740 results = []
741 # generate matches that use just one alt from self.content
742 for alt in self.content:
743 for c, r in generate_matches(alt, nodes):
744 yield c, r
745 results.append((c, r))
746
747 # for each match, iterate down the nodes
748 while results:
749 new_results = []
750 for c0, r0 in results:
751 # stop if the entire set of nodes has been matched
752 if c0 < nodelen and c0 <= self.max:
753 for alt in self.content:
754 for c1, r1 in generate_matches(alt, nodes[c0:]):
755 if c1 > 0:
756 r = {}
757 r.update(r0)
758 r.update(r1)
759 yield c0 + c1, r
760 new_results.append((c0 + c1, r))
761 results = new_results
Martin v. Löwisef04c442008-03-19 05:04:44 +0000762
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000763 def _bare_name_matches(self, nodes):
764 """Special optimized matcher for bare_name."""
765 count = 0
766 r = {}
767 done = False
768 max = len(nodes)
769 while not done and count < max:
770 done = True
771 for leaf in self.content:
772 if leaf[0].match(nodes[count], r):
773 count += 1
774 done = False
775 break
776 r[self.name] = nodes[:count]
777 return count, r
778
Martin v. Löwisef04c442008-03-19 05:04:44 +0000779 def _recursive_matches(self, nodes, count):
780 """Helper to recursively yield the matches."""
781 assert self.content is not None
782 if count >= self.min:
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000783 yield 0, {}
Martin v. Löwisef04c442008-03-19 05:04:44 +0000784 if count < self.max:
785 for alt in self.content:
786 for c0, r0 in generate_matches(alt, nodes):
787 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
788 r = {}
789 r.update(r0)
790 r.update(r1)
791 yield c0 + c1, r
792
793
794class NegatedPattern(BasePattern):
795
796 def __init__(self, content=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000797 """
798 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000799
800 The argument is either a pattern or None. If it is None, this
801 only matches an empty sequence (effectively '$' in regex
802 lingo). If it is not None, this matches whenever the argument
803 pattern doesn't have any matches.
804 """
805 if content is not None:
806 assert isinstance(content, BasePattern), repr(content)
807 self.content = content
808
809 def match(self, node):
810 # We never match a node in its entirety
811 return False
812
813 def match_seq(self, nodes):
814 # We only match an empty sequence of nodes in its entirety
815 return len(nodes) == 0
816
817 def generate_matches(self, nodes):
818 if self.content is None:
819 # Return a match if there is an empty sequence
820 if len(nodes) == 0:
821 yield 0, {}
822 else:
823 # Return a match if the argument pattern has no matches
824 for c, r in self.content.generate_matches(nodes):
825 return
826 yield 0, {}
827
828
829def generate_matches(patterns, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000830 """
831 Generator yielding matches for a sequence of patterns and nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000832
833 Args:
834 patterns: a sequence of patterns
835 nodes: a sequence of nodes
836
837 Yields:
838 (count, results) tuples where:
839 count: the entire sequence of patterns matches nodes[:count];
840 results: dict containing named submatches.
841 """
842 if not patterns:
843 yield 0, {}
844 else:
845 p, rest = patterns[0], patterns[1:]
846 for c0, r0 in p.generate_matches(nodes):
847 if not rest:
848 yield c0, r0
849 else:
850 for c1, r1 in generate_matches(rest, nodes[c0:]):
851 r = {}
852 r.update(r0)
853 r.update(r1)
854 yield c0 + c1, r