blob: d3457335964adfc4478b3f81570e899e9fa94a39 [file] [log] [blame]
Martin v. Löwis5e37bae2008-03-19 04:43:46 +00001# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
4"""Python parse tree definitions.
5
6This is a very concrete parse tree; we need to keep every token and
7even the comments and whitespace between tokens.
8
9There's also a pattern matching implementation here.
10"""
11
12__author__ = "Guido van Rossum <guido@python.org>"
13
Benjamin Peterson2555d9d2008-11-28 22:12:14 +000014import sys
15from StringIO import StringIO
16
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000017
18HUGE = 0x7FFFFFFF # maximum repeat count, default max
19
Martin v. Löwisbaf267c2008-03-22 00:01:12 +000020_type_reprs = {}
21def type_repr(type_num):
22 global _type_reprs
23 if not _type_reprs:
24 from .pygram import python_symbols
25 # printing tokens is possible but not as useful
26 # from .pgen2 import token // token.__dict__.items():
27 for name, val in python_symbols.__dict__.items():
28 if type(val) == int: _type_reprs[val] = name
29 return _type_reprs.setdefault(type_num, type_num)
30
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000031
32class Base(object):
33
34 """Abstract base class for Node and Leaf.
35
36 This provides some default functionality and boilerplate using the
37 template pattern.
38
39 A node may be a subnode of at most one parent.
40 """
41
42 # Default values for instance variables
43 type = None # int: token number (< 256) or symbol number (>= 256)
44 parent = None # Parent node pointer, or None
45 children = () # Tuple of subnodes
46 was_changed = False
47
48 def __new__(cls, *args, **kwds):
49 """Constructor that prevents Base from being instantiated."""
50 assert cls is not Base, "Cannot instantiate Base"
51 return object.__new__(cls)
52
53 def __eq__(self, other):
54 """Compares two nodes for equality.
55
56 This calls the method _eq().
57 """
58 if self.__class__ is not other.__class__:
59 return NotImplemented
60 return self._eq(other)
61
62 def __ne__(self, other):
63 """Compares two nodes for inequality.
64
65 This calls the method _eq().
66 """
67 if self.__class__ is not other.__class__:
68 return NotImplemented
69 return not self._eq(other)
70
71 def _eq(self, other):
72 """Compares two nodes for equality.
73
74 This is called by __eq__ and __ne__. It is only called if the
75 two nodes have the same type. This must be implemented by the
76 concrete subclass. Nodes should be considered equal if they
77 have the same structure, ignoring the prefix string and other
78 context information.
79 """
80 raise NotImplementedError
81
82 def clone(self):
83 """Returns a cloned (deep) copy of self.
84
85 This must be implemented by the concrete subclass.
86 """
87 raise NotImplementedError
88
89 def post_order(self):
90 """Returns a post-order iterator for the tree.
91
92 This must be implemented by the concrete subclass.
93 """
94 raise NotImplementedError
95
96 def pre_order(self):
97 """Returns a pre-order iterator for the tree.
98
99 This must be implemented by the concrete subclass.
100 """
101 raise NotImplementedError
102
103 def set_prefix(self, prefix):
104 """Sets the prefix for the node (see Leaf class).
105
106 This must be implemented by the concrete subclass.
107 """
108 raise NotImplementedError
109
110 def get_prefix(self):
111 """Returns the prefix for the node (see Leaf class).
112
113 This must be implemented by the concrete subclass.
114 """
115 raise NotImplementedError
116
117 def replace(self, new):
118 """Replaces this node with a new one in the parent."""
119 assert self.parent is not None, str(self)
120 assert new is not None
121 if not isinstance(new, list):
122 new = [new]
123 l_children = []
124 found = False
125 for ch in self.parent.children:
126 if ch is self:
127 assert not found, (self.parent.children, self, new)
128 if new is not None:
129 l_children.extend(new)
130 found = True
131 else:
132 l_children.append(ch)
133 assert found, (self.children, self, new)
134 self.parent.changed()
135 self.parent.children = l_children
136 for x in new:
137 x.parent = self.parent
138 self.parent = None
139
140 def get_lineno(self):
141 """Returns the line number which generated the invocant node."""
142 node = self
143 while not isinstance(node, Leaf):
144 if not node.children:
145 return
146 node = node.children[0]
147 return node.lineno
148
149 def changed(self):
150 if self.parent:
151 self.parent.changed()
152 self.was_changed = True
153
154 def remove(self):
155 """Remove the node from the tree. Returns the position of the node
156 in its parent's children before it was removed."""
157 if self.parent:
158 for i, node in enumerate(self.parent.children):
159 if node is self:
160 self.parent.changed()
161 del self.parent.children[i]
162 self.parent = None
163 return i
164
165 def get_next_sibling(self):
166 """Return the node immediately following the invocant in their
167 parent's children list. If the invocant does not have a next
168 sibling, return None."""
169 if self.parent is None:
170 return None
171
172 # Can't use index(); we need to test by identity
Martin v. Löwis60a819d2008-04-10 02:48:01 +0000173 for i, child in enumerate(self.parent.children):
174 if child is self:
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000175 try:
176 return self.parent.children[i+1]
177 except IndexError:
178 return None
179
Martin v. Löwis60a819d2008-04-10 02:48:01 +0000180 def get_prev_sibling(self):
181 """Return the node immediately preceding the invocant in their
182 parent's children list. If the invocant does not have a previous
183 sibling, return None."""
184 if self.parent is None:
185 return None
186
187 # Can't use index(); we need to test by identity
188 for i, child in enumerate(self.parent.children):
189 if child is self:
190 if i == 0:
191 return None
192 return self.parent.children[i-1]
193
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000194 def get_suffix(self):
195 """Return the string immediately following the invocant node. This
196 is effectively equivalent to node.get_next_sibling().get_prefix()"""
197 next_sib = self.get_next_sibling()
198 if next_sib is None:
199 return ""
200 return next_sib.get_prefix()
201
202
203class Node(Base):
204
205 """Concrete implementation for interior nodes."""
206
207 def __init__(self, type, children, context=None, prefix=None):
208 """Initializer.
209
210 Takes a type constant (a symbol number >= 256), a sequence of
211 child nodes, and an optional context keyword argument.
212
213 As a side effect, the parent pointers of the children are updated.
214 """
215 assert type >= 256, type
216 self.type = type
217 self.children = list(children)
218 for ch in self.children:
219 assert ch.parent is None, repr(ch)
220 ch.parent = self
221 if prefix is not None:
222 self.set_prefix(prefix)
223
224 def __repr__(self):
225 """Returns a canonical string representation."""
Martin v. Löwisbaf267c2008-03-22 00:01:12 +0000226 return "%s(%s, %r)" % (self.__class__.__name__,
227 type_repr(self.type),
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000228 self.children)
229
230 def __str__(self):
231 """Returns a pretty string representation.
232
233 This reproduces the input source exactly.
234 """
235 return "".join(map(str, self.children))
236
237 def _eq(self, other):
238 """Compares two nodes for equality."""
239 return (self.type, self.children) == (other.type, other.children)
240
241 def clone(self):
242 """Returns a cloned (deep) copy of self."""
243 return Node(self.type, [ch.clone() for ch in self.children])
244
245 def post_order(self):
246 """Returns a post-order iterator for the tree."""
247 for child in self.children:
248 for node in child.post_order():
249 yield node
250 yield self
251
252 def pre_order(self):
253 """Returns a pre-order iterator for the tree."""
254 yield self
255 for child in self.children:
256 for node in child.post_order():
257 yield node
258
259 def set_prefix(self, prefix):
260 """Sets the prefix for the node.
261
262 This passes the responsibility on to the first child.
263 """
264 if self.children:
265 self.children[0].set_prefix(prefix)
266
267 def get_prefix(self):
268 """Returns the prefix for the node.
269
270 This passes the call on to the first child.
271 """
272 if not self.children:
273 return ""
274 return self.children[0].get_prefix()
275
276 def set_child(self, i, child):
277 """Equivalent to 'node.children[i] = child'. This method also sets the
278 child's parent attribute appropriately."""
279 child.parent = self
280 self.children[i].parent = None
281 self.children[i] = child
282
283 def insert_child(self, i, child):
284 """Equivalent to 'node.children.insert(i, child)'. This method also
285 sets the child's parent attribute appropriately."""
286 child.parent = self
287 self.children.insert(i, child)
288
289 def append_child(self, child):
290 """Equivalent to 'node.children.append(child)'. This method also
291 sets the child's parent attribute appropriately."""
292 child.parent = self
293 self.children.append(child)
294
295
296class Leaf(Base):
297
298 """Concrete implementation for leaf nodes."""
299
300 # Default values for instance variables
301 prefix = "" # Whitespace and comments preceding this token in the input
302 lineno = 0 # Line where this token starts in the input
303 column = 0 # Column where this token tarts in the input
304
305 def __init__(self, type, value, context=None, prefix=None):
306 """Initializer.
307
308 Takes a type constant (a token number < 256), a string value,
309 and an optional context keyword argument.
310 """
311 assert 0 <= type < 256, type
312 if context is not None:
313 self.prefix, (self.lineno, self.column) = context
314 self.type = type
315 self.value = value
316 if prefix is not None:
317 self.prefix = prefix
318
319 def __repr__(self):
320 """Returns a canonical string representation."""
321 return "%s(%r, %r)" % (self.__class__.__name__,
322 self.type,
323 self.value)
324
325 def __str__(self):
326 """Returns a pretty string representation.
327
328 This reproduces the input source exactly.
329 """
330 return self.prefix + str(self.value)
331
332 def _eq(self, other):
333 """Compares two nodes for equality."""
334 return (self.type, self.value) == (other.type, other.value)
335
336 def clone(self):
337 """Returns a cloned (deep) copy of self."""
338 return Leaf(self.type, self.value,
339 (self.prefix, (self.lineno, self.column)))
340
341 def post_order(self):
342 """Returns a post-order iterator for the tree."""
343 yield self
344
345 def pre_order(self):
346 """Returns a pre-order iterator for the tree."""
347 yield self
348
349 def set_prefix(self, prefix):
350 """Sets the prefix for the node."""
351 self.changed()
352 self.prefix = prefix
353
354 def get_prefix(self):
355 """Returns the prefix for the node."""
356 return self.prefix
357
358
359def convert(gr, raw_node):
360 """Converts raw node information to a Node or Leaf instance.
361
362 This is passed to the parser driver which calls it whenever a
363 reduction of a grammar rule produces a new complete node, so that
364 the tree is build strictly bottom-up.
365 """
366 type, value, context, children = raw_node
367 if children or type in gr.number2symbol:
368 # If there's exactly one child, return that child instead of
369 # creating a new node.
370 if len(children) == 1:
371 return children[0]
372 return Node(type, children, context=context)
373 else:
374 return Leaf(type, value, context=context)
375
376
377class BasePattern(object):
378
379 """A pattern is a tree matching pattern.
380
381 It looks for a specific node type (token or symbol), and
382 optionally for a specific content.
383
384 This is an abstract base class. There are three concrete
385 subclasses:
386
387 - LeafPattern matches a single leaf node;
388 - NodePattern matches a single node (usually non-leaf);
389 - WildcardPattern matches a sequence of nodes of variable length.
390 """
391
392 # Defaults for instance variables
393 type = None # Node type (token if < 256, symbol if >= 256)
394 content = None # Optional content matching pattern
395 name = None # Optional name used to store match in results dict
396
397 def __new__(cls, *args, **kwds):
398 """Constructor that prevents BasePattern from being instantiated."""
399 assert cls is not BasePattern, "Cannot instantiate BasePattern"
400 return object.__new__(cls)
401
402 def __repr__(self):
Martin v. Löwisbaf267c2008-03-22 00:01:12 +0000403 args = [type_repr(self.type), self.content, self.name]
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000404 while args and args[-1] is None:
405 del args[-1]
406 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
407
408 def optimize(self):
409 """A subclass can define this as a hook for optimizations.
410
411 Returns either self or another node with the same effect.
412 """
413 return self
414
415 def match(self, node, results=None):
416 """Does this pattern exactly match a node?
417
418 Returns True if it matches, False if not.
419
420 If results is not None, it must be a dict which will be
421 updated with the nodes matching named subpatterns.
422
423 Default implementation for non-wildcard patterns.
424 """
425 if self.type is not None and node.type != self.type:
426 return False
427 if self.content is not None:
428 r = None
429 if results is not None:
430 r = {}
431 if not self._submatch(node, r):
432 return False
433 if r:
434 results.update(r)
435 if results is not None and self.name:
436 results[self.name] = node
437 return True
438
439 def match_seq(self, nodes, results=None):
440 """Does this pattern exactly match a sequence of nodes?
441
442 Default implementation for non-wildcard patterns.
443 """
444 if len(nodes) != 1:
445 return False
446 return self.match(nodes[0], results)
447
448 def generate_matches(self, nodes):
449 """Generator yielding all matches for this pattern.
450
451 Default implementation for non-wildcard patterns.
452 """
453 r = {}
454 if nodes and self.match(nodes[0], r):
455 yield 1, r
456
457
458class LeafPattern(BasePattern):
459
460 def __init__(self, type=None, content=None, name=None):
461 """Initializer. Takes optional type, content, and name.
462
463 The type, if given must be a token type (< 256). If not given,
464 this matches any *leaf* node; the content may still be required.
465
466 The content, if given, must be a string.
467
468 If a name is given, the matching node is stored in the results
469 dict under that key.
470 """
471 if type is not None:
472 assert 0 <= type < 256, type
473 if content is not None:
474 assert isinstance(content, basestring), repr(content)
475 self.type = type
476 self.content = content
477 self.name = name
478
479 def match(self, node, results=None):
480 """Override match() to insist on a leaf node."""
481 if not isinstance(node, Leaf):
482 return False
483 return BasePattern.match(self, node, results)
484
485 def _submatch(self, node, results=None):
486 """Match the pattern's content to the node's children.
487
488 This assumes the node type matches and self.content is not None.
489
490 Returns True if it matches, False if not.
491
492 If results is not None, it must be a dict which will be
493 updated with the nodes matching named subpatterns.
494
495 When returning False, the results dict may still be updated.
496 """
497 return self.content == node.value
498
499
500class NodePattern(BasePattern):
501
502 wildcards = False
503
504 def __init__(self, type=None, content=None, name=None):
505 """Initializer. Takes optional type, content, and name.
506
507 The type, if given, must be a symbol type (>= 256). If the
508 type is None this matches *any* single node (leaf or not),
509 except if content is not None, in which it only matches
510 non-leaf nodes that also match the content pattern.
511
512 The content, if not None, must be a sequence of Patterns that
513 must match the node's children exactly. If the content is
514 given, the type must not be None.
515
516 If a name is given, the matching node is stored in the results
517 dict under that key.
518 """
519 if type is not None:
520 assert type >= 256, type
521 if content is not None:
522 assert not isinstance(content, basestring), repr(content)
523 content = list(content)
524 for i, item in enumerate(content):
525 assert isinstance(item, BasePattern), (i, item)
526 if isinstance(item, WildcardPattern):
527 self.wildcards = True
528 self.type = type
529 self.content = content
530 self.name = name
531
532 def _submatch(self, node, results=None):
533 """Match the pattern's content to the node's children.
534
535 This assumes the node type matches and self.content is not None.
536
537 Returns True if it matches, False if not.
538
539 If results is not None, it must be a dict which will be
540 updated with the nodes matching named subpatterns.
541
542 When returning False, the results dict may still be updated.
543 """
544 if self.wildcards:
545 for c, r in generate_matches(self.content, node.children):
546 if c == len(node.children):
547 if results is not None:
548 results.update(r)
549 return True
550 return False
551 if len(self.content) != len(node.children):
552 return False
553 for subpattern, child in zip(self.content, node.children):
554 if not subpattern.match(child, results):
555 return False
556 return True
557
558
559class WildcardPattern(BasePattern):
560
561 """A wildcard pattern can match zero or more nodes.
562
563 This has all the flexibility needed to implement patterns like:
564
565 .* .+ .? .{m,n}
566 (a b c | d e | f)
567 (...)* (...)+ (...)? (...){m,n}
568
569 except it always uses non-greedy matching.
570 """
571
572 def __init__(self, content=None, min=0, max=HUGE, name=None):
573 """Initializer.
574
575 Args:
576 content: optional sequence of subsequences of patterns;
577 if absent, matches one node;
578 if present, each subsequence is an alternative [*]
579 min: optinal minumum number of times to match, default 0
580 max: optional maximum number of times tro match, default HUGE
581 name: optional name assigned to this match
582
583 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
584 equivalent to (a b c | d e | f g h); if content is None,
585 this is equivalent to '.' in regular expression terms.
586 The min and max parameters work as follows:
587 min=0, max=maxint: .*
588 min=1, max=maxint: .+
589 min=0, max=1: .?
590 min=1, max=1: .
591 If content is not None, replace the dot with the parenthesized
592 list of alternatives, e.g. (a b c | d e | f g h)*
593 """
594 assert 0 <= min <= max <= HUGE, (min, max)
595 if content is not None:
596 content = tuple(map(tuple, content)) # Protect against alterations
597 # Check sanity of alternatives
598 assert len(content), repr(content) # Can't have zero alternatives
599 for alt in content:
600 assert len(alt), repr(alt) # Can have empty alternatives
601 self.content = content
602 self.min = min
603 self.max = max
604 self.name = name
605
606 def optimize(self):
607 """Optimize certain stacked wildcard patterns."""
608 subpattern = None
609 if (self.content is not None and
610 len(self.content) == 1 and len(self.content[0]) == 1):
611 subpattern = self.content[0][0]
612 if self.min == 1 and self.max == 1:
613 if self.content is None:
614 return NodePattern(name=self.name)
615 if subpattern is not None and self.name == subpattern.name:
616 return subpattern.optimize()
617 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
618 subpattern.min <= 1 and self.name == subpattern.name):
619 return WildcardPattern(subpattern.content,
620 self.min*subpattern.min,
621 self.max*subpattern.max,
622 subpattern.name)
623 return self
624
625 def match(self, node, results=None):
626 """Does this pattern exactly match a node?"""
627 return self.match_seq([node], results)
628
629 def match_seq(self, nodes, results=None):
630 """Does this pattern exactly match a sequence of nodes?"""
631 for c, r in self.generate_matches(nodes):
632 if c == len(nodes):
633 if results is not None:
634 results.update(r)
635 if self.name:
636 results[self.name] = list(nodes)
637 return True
638 return False
639
640 def generate_matches(self, nodes):
641 """Generator yielding matches for a sequence of nodes.
642
643 Args:
644 nodes: sequence of nodes
645
646 Yields:
647 (count, results) tuples where:
648 count: the match comprises nodes[:count];
649 results: dict containing named submatches.
650 """
651 if self.content is None:
652 # Shortcut for special case (see __init__.__doc__)
653 for count in xrange(self.min, 1 + min(len(nodes), self.max)):
654 r = {}
655 if self.name:
656 r[self.name] = nodes[:count]
657 yield count, r
Benjamin Peterson1ab31492008-07-17 02:07:46 +0000658 elif self.name == "bare_name":
659 yield self._bare_name_matches(nodes)
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000660 else:
Benjamin Peterson2555d9d2008-11-28 22:12:14 +0000661 # The reason for this is that hitting the recursion limit usually
662 # results in some ugly messages about how RuntimeErrors are being
663 # ignored.
664 save_stderr = sys.stderr
665 sys.stderr = StringIO()
Benjamin Peterson4d164152008-10-04 20:55:50 +0000666 try:
667 for count, r in self._recursive_matches(nodes, 0):
668 if self.name:
669 r[self.name] = nodes[:count]
670 yield count, r
671 except RuntimeError:
672 # We fall back to the iterative pattern matching scheme if the recursive
673 # scheme hits the recursion limit.
674 for count, r in self._iterative_matches(nodes):
675 if self.name:
676 r[self.name] = nodes[:count]
677 yield count, r
Benjamin Peterson2555d9d2008-11-28 22:12:14 +0000678 finally:
679 sys.stderr = save_stderr
Benjamin Peterson4d164152008-10-04 20:55:50 +0000680
681 def _iterative_matches(self, nodes):
682 """Helper to iteratively yield the matches."""
683 nodelen = len(nodes)
684 if 0 >= self.min:
685 yield 0, {}
686
687 results = []
688 # generate matches that use just one alt from self.content
689 for alt in self.content:
690 for c, r in generate_matches(alt, nodes):
691 yield c, r
692 results.append((c, r))
693
694 # for each match, iterate down the nodes
695 while results:
696 new_results = []
697 for c0, r0 in results:
698 # stop if the entire set of nodes has been matched
699 if c0 < nodelen and c0 <= self.max:
700 for alt in self.content:
701 for c1, r1 in generate_matches(alt, nodes[c0:]):
702 if c1 > 0:
703 r = {}
704 r.update(r0)
705 r.update(r1)
706 yield c0 + c1, r
707 new_results.append((c0 + c1, r))
708 results = new_results
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000709
Benjamin Peterson1ab31492008-07-17 02:07:46 +0000710 def _bare_name_matches(self, nodes):
711 """Special optimized matcher for bare_name."""
712 count = 0
713 r = {}
714 done = False
715 max = len(nodes)
716 while not done and count < max:
717 done = True
718 for leaf in self.content:
719 if leaf[0].match(nodes[count], r):
720 count += 1
721 done = False
722 break
723 r[self.name] = nodes[:count]
724 return count, r
725
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000726 def _recursive_matches(self, nodes, count):
727 """Helper to recursively yield the matches."""
728 assert self.content is not None
729 if count >= self.min:
Benjamin Peterson1ab31492008-07-17 02:07:46 +0000730 yield 0, {}
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000731 if count < self.max:
732 for alt in self.content:
733 for c0, r0 in generate_matches(alt, nodes):
734 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
735 r = {}
736 r.update(r0)
737 r.update(r1)
738 yield c0 + c1, r
739
740
741class NegatedPattern(BasePattern):
742
743 def __init__(self, content=None):
744 """Initializer.
745
746 The argument is either a pattern or None. If it is None, this
747 only matches an empty sequence (effectively '$' in regex
748 lingo). If it is not None, this matches whenever the argument
749 pattern doesn't have any matches.
750 """
751 if content is not None:
752 assert isinstance(content, BasePattern), repr(content)
753 self.content = content
754
755 def match(self, node):
756 # We never match a node in its entirety
757 return False
758
759 def match_seq(self, nodes):
760 # We only match an empty sequence of nodes in its entirety
761 return len(nodes) == 0
762
763 def generate_matches(self, nodes):
764 if self.content is None:
765 # Return a match if there is an empty sequence
766 if len(nodes) == 0:
767 yield 0, {}
768 else:
769 # Return a match if the argument pattern has no matches
770 for c, r in self.content.generate_matches(nodes):
771 return
772 yield 0, {}
773
774
775def generate_matches(patterns, nodes):
776 """Generator yielding matches for a sequence of patterns and nodes.
777
778 Args:
779 patterns: a sequence of patterns
780 nodes: a sequence of nodes
781
782 Yields:
783 (count, results) tuples where:
784 count: the entire sequence of patterns matches nodes[:count];
785 results: dict containing named submatches.
786 """
787 if not patterns:
788 yield 0, {}
789 else:
790 p, rest = patterns[0], patterns[1:]
791 for c0, r0 in p.generate_matches(nodes):
792 if not rest:
793 yield c0, r0
794 else:
795 for c1, r1 in generate_matches(rest, nodes[c0:]):
796 r = {}
797 r.update(r0)
798 r.update(r1)
799 yield c0 + c1, r