blob: 6245c8c520d0cea78c7e8cddc937ccd329bb0987 [file] [log] [blame]
Benjamin Peterson18532e52010-02-06 02:40:03 +00001# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
4"""
5Python parse tree definitions.
6
7This is a very concrete parse tree; we need to keep every token and
8even the comments and whitespace between tokens.
9
10There's also a pattern matching implementation here.
11"""
12
13__author__ = "Guido van Rossum <guido@python.org>"
14
15import sys
16import warnings
17from StringIO import StringIO
18
19
20HUGE = 0x7FFFFFFF # maximum repeat count, default max
21
22_type_reprs = {}
23def type_repr(type_num):
24 global _type_reprs
25 if not _type_reprs:
26 from .pygram import python_symbols
27 # printing tokens is possible but not as useful
28 # from .pgen2 import token // token.__dict__.items():
29 for name, val in python_symbols.__dict__.items():
30 if type(val) == int: _type_reprs[val] = name
31 return _type_reprs.setdefault(type_num, type_num)
32
33
34class Base(object):
35
36 """
37 Abstract base class for Node and Leaf.
38
39 This provides some default functionality and boilerplate using the
40 template pattern.
41
42 A node may be a subnode of at most one parent.
43 """
44
45 # Default values for instance variables
46 type = None # int: token number (< 256) or symbol number (>= 256)
47 parent = None # Parent node pointer, or None
48 children = () # Tuple of subnodes
49 was_changed = False
50
51 def __new__(cls, *args, **kwds):
52 """Constructor that prevents Base from being instantiated."""
53 assert cls is not Base, "Cannot instantiate Base"
54 return object.__new__(cls)
55
56 def __eq__(self, other):
57 """
58 Compare two nodes for equality.
59
60 This calls the method _eq().
61 """
62 if self.__class__ is not other.__class__:
63 return NotImplemented
64 return self._eq(other)
65
66 __hash__ = None # For Py3 compatibility.
67
68 def __ne__(self, other):
69 """
70 Compare two nodes for inequality.
71
72 This calls the method _eq().
73 """
74 if self.__class__ is not other.__class__:
75 return NotImplemented
76 return not self._eq(other)
77
78 def _eq(self, other):
79 """
80 Compare two nodes for equality.
81
82 This is called by __eq__ and __ne__. It is only called if the two nodes
83 have the same type. This must be implemented by the concrete subclass.
84 Nodes should be considered equal if they have the same structure,
85 ignoring the prefix string and other context information.
86 """
87 raise NotImplementedError
88
89 def clone(self):
90 """
91 Return a cloned (deep) copy of self.
92
93 This must be implemented by the concrete subclass.
94 """
95 raise NotImplementedError
96
97 def post_order(self):
98 """
99 Return a post-order iterator for the tree.
100
101 This must be implemented by the concrete subclass.
102 """
103 raise NotImplementedError
104
105 def pre_order(self):
106 """
107 Return a pre-order iterator for the tree.
108
109 This must be implemented by the concrete subclass.
110 """
111 raise NotImplementedError
112
113 def set_prefix(self, prefix):
114 """
115 Set the prefix for the node (see Leaf class).
116
117 DEPRECATED; use the prefix property directly.
118 """
119 warnings.warn("set_prefix() is deprecated; use the prefix property",
120 DeprecationWarning, stacklevel=2)
121 self.prefix = prefix
122
123 def get_prefix(self):
124 """
125 Return the prefix for the node (see Leaf class).
126
127 DEPRECATED; use the prefix property directly.
128 """
129 warnings.warn("get_prefix() is deprecated; use the prefix property",
130 DeprecationWarning, stacklevel=2)
131 return self.prefix
132
133 def replace(self, new):
134 """Replace this node with a new one in the parent."""
135 assert self.parent is not None, str(self)
136 assert new is not None
137 if not isinstance(new, list):
138 new = [new]
139 l_children = []
140 found = False
141 for ch in self.parent.children:
142 if ch is self:
143 assert not found, (self.parent.children, self, new)
144 if new is not None:
145 l_children.extend(new)
146 found = True
147 else:
148 l_children.append(ch)
149 assert found, (self.children, self, new)
150 self.parent.changed()
151 self.parent.children = l_children
152 for x in new:
153 x.parent = self.parent
154 self.parent = None
155
156 def get_lineno(self):
157 """Return the line number which generated the invocant node."""
158 node = self
159 while not isinstance(node, Leaf):
160 if not node.children:
161 return
162 node = node.children[0]
163 return node.lineno
164
165 def changed(self):
166 if self.parent:
167 self.parent.changed()
168 self.was_changed = True
169
170 def remove(self):
171 """
172 Remove the node from the tree. Returns the position of the node in its
173 parent's children before it was removed.
174 """
175 if self.parent:
176 for i, node in enumerate(self.parent.children):
177 if node is self:
178 self.parent.changed()
179 del self.parent.children[i]
180 self.parent = None
181 return i
182
183 @property
184 def next_sibling(self):
185 """
186 The node immediately following the invocant in their parent's children
187 list. If the invocant does not have a next sibling, it is None
188 """
189 if self.parent is None:
190 return None
191
192 # Can't use index(); we need to test by identity
193 for i, child in enumerate(self.parent.children):
194 if child is self:
195 try:
196 return self.parent.children[i+1]
197 except IndexError:
198 return None
199
200 @property
201 def prev_sibling(self):
202 """
203 The node immediately preceding the invocant in their parent's children
204 list. If the invocant does not have a previous sibling, it is None.
205 """
206 if self.parent is None:
207 return None
208
209 # Can't use index(); we need to test by identity
210 for i, child in enumerate(self.parent.children):
211 if child is self:
212 if i == 0:
213 return None
214 return self.parent.children[i-1]
215
216 def get_suffix(self):
217 """
218 Return the string immediately following the invocant node. This is
219 effectively equivalent to node.next_sibling.prefix
220 """
221 next_sib = self.next_sibling
222 if next_sib is None:
223 return u""
224 return next_sib.prefix
225
226 if sys.version_info < (3, 0):
227 def __str__(self):
228 return unicode(self).encode("ascii")
229
230
231class Node(Base):
232
233 """Concrete implementation for interior nodes."""
234
235 def __init__(self, type, children, context=None, prefix=None):
236 """
237 Initializer.
238
239 Takes a type constant (a symbol number >= 256), a sequence of
240 child nodes, and an optional context keyword argument.
241
242 As a side effect, the parent pointers of the children are updated.
243 """
244 assert type >= 256, type
245 self.type = type
246 self.children = list(children)
247 for ch in self.children:
248 assert ch.parent is None, repr(ch)
249 ch.parent = self
250 if prefix is not None:
251 self.prefix = prefix
252
253 def __repr__(self):
254 """Return a canonical string representation."""
255 return "%s(%s, %r)" % (self.__class__.__name__,
256 type_repr(self.type),
257 self.children)
258
259 def __unicode__(self):
260 """
261 Return a pretty string representation.
262
263 This reproduces the input source exactly.
264 """
265 return u"".join(map(unicode, self.children))
266
267 if sys.version_info > (3, 0):
268 __str__ = __unicode__
269
270 def _eq(self, other):
271 """Compare two nodes for equality."""
272 return (self.type, self.children) == (other.type, other.children)
273
274 def clone(self):
275 """Return a cloned (deep) copy of self."""
276 return Node(self.type, [ch.clone() for ch in self.children])
277
278 def post_order(self):
279 """Return a post-order iterator for the tree."""
280 for child in self.children:
281 for node in child.post_order():
282 yield node
283 yield self
284
285 def pre_order(self):
286 """Return a pre-order iterator for the tree."""
287 yield self
288 for child in self.children:
289 for node in child.post_order():
290 yield node
291
Benjamin Peterson6d54b012010-05-07 19:06:32 +0000292 def _prefix_getter(self):
Benjamin Peterson18532e52010-02-06 02:40:03 +0000293 """
294 The whitespace and comments preceding this node in the input.
295 """
296 if not self.children:
297 return ""
298 return self.children[0].prefix
299
Benjamin Peterson6d54b012010-05-07 19:06:32 +0000300 def _prefix_setter(self, prefix):
Benjamin Peterson18532e52010-02-06 02:40:03 +0000301 if self.children:
302 self.children[0].prefix = prefix
303
Benjamin Peterson6d54b012010-05-07 19:06:32 +0000304 prefix = property(_prefix_getter, _prefix_setter)
305
Benjamin Peterson18532e52010-02-06 02:40:03 +0000306 def set_child(self, i, child):
307 """
308 Equivalent to 'node.children[i] = child'. This method also sets the
309 child's parent attribute appropriately.
310 """
311 child.parent = self
312 self.children[i].parent = None
313 self.children[i] = child
314 self.changed()
315
316 def insert_child(self, i, child):
317 """
318 Equivalent to 'node.children.insert(i, child)'. This method also sets
319 the child's parent attribute appropriately.
320 """
321 child.parent = self
322 self.children.insert(i, child)
323 self.changed()
324
325 def append_child(self, child):
326 """
327 Equivalent to 'node.children.append(child)'. This method also sets the
328 child's parent attribute appropriately.
329 """
330 child.parent = self
331 self.children.append(child)
332 self.changed()
333
334
335class Leaf(Base):
336
337 """Concrete implementation for leaf nodes."""
338
339 # Default values for instance variables
340 _prefix = "" # Whitespace and comments preceding this token in the input
341 lineno = 0 # Line where this token starts in the input
342 column = 0 # Column where this token tarts in the input
343
344 def __init__(self, type, value, context=None, prefix=None):
345 """
346 Initializer.
347
348 Takes a type constant (a token number < 256), a string value, and an
349 optional context keyword argument.
350 """
351 assert 0 <= type < 256, type
352 if context is not None:
353 self._prefix, (self.lineno, self.column) = context
354 self.type = type
355 self.value = value
356 if prefix is not None:
357 self._prefix = prefix
358
359 def __repr__(self):
360 """Return a canonical string representation."""
361 return "%s(%r, %r)" % (self.__class__.__name__,
362 self.type,
363 self.value)
364
365 def __unicode__(self):
366 """
367 Return a pretty string representation.
368
369 This reproduces the input source exactly.
370 """
371 return self.prefix + unicode(self.value)
372
373 if sys.version_info > (3, 0):
374 __str__ = __unicode__
375
376 def _eq(self, other):
377 """Compare two nodes for equality."""
378 return (self.type, self.value) == (other.type, other.value)
379
380 def clone(self):
381 """Return a cloned (deep) copy of self."""
382 return Leaf(self.type, self.value,
383 (self.prefix, (self.lineno, self.column)))
384
385 def post_order(self):
386 """Return a post-order iterator for the tree."""
387 yield self
388
389 def pre_order(self):
390 """Return a pre-order iterator for the tree."""
391 yield self
392
Benjamin Peterson6d54b012010-05-07 19:06:32 +0000393 def _prefix_getter(self):
Benjamin Peterson18532e52010-02-06 02:40:03 +0000394 """
395 The whitespace and comments preceding this token in the input.
396 """
397 return self._prefix
398
Benjamin Peterson6d54b012010-05-07 19:06:32 +0000399 def _prefix_setter(self, prefix):
Benjamin Peterson18532e52010-02-06 02:40:03 +0000400 self.changed()
401 self._prefix = prefix
402
Benjamin Peterson6d54b012010-05-07 19:06:32 +0000403 prefix = property(_prefix_getter, _prefix_setter)
Benjamin Peterson18532e52010-02-06 02:40:03 +0000404
405def convert(gr, raw_node):
406 """
407 Convert raw node information to a Node or Leaf instance.
408
409 This is passed to the parser driver which calls it whenever a reduction of a
410 grammar rule produces a new complete node, so that the tree is build
411 strictly bottom-up.
412 """
413 type, value, context, children = raw_node
414 if children or type in gr.number2symbol:
415 # If there's exactly one child, return that child instead of
416 # creating a new node.
417 if len(children) == 1:
418 return children[0]
419 return Node(type, children, context=context)
420 else:
421 return Leaf(type, value, context=context)
422
423
424class BasePattern(object):
425
426 """
427 A pattern is a tree matching pattern.
428
429 It looks for a specific node type (token or symbol), and
430 optionally for a specific content.
431
432 This is an abstract base class. There are three concrete
433 subclasses:
434
435 - LeafPattern matches a single leaf node;
436 - NodePattern matches a single node (usually non-leaf);
437 - WildcardPattern matches a sequence of nodes of variable length.
438 """
439
440 # Defaults for instance variables
441 type = None # Node type (token if < 256, symbol if >= 256)
442 content = None # Optional content matching pattern
443 name = None # Optional name used to store match in results dict
444
445 def __new__(cls, *args, **kwds):
446 """Constructor that prevents BasePattern from being instantiated."""
447 assert cls is not BasePattern, "Cannot instantiate BasePattern"
448 return object.__new__(cls)
449
450 def __repr__(self):
451 args = [type_repr(self.type), self.content, self.name]
452 while args and args[-1] is None:
453 del args[-1]
454 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
455
456 def optimize(self):
457 """
458 A subclass can define this as a hook for optimizations.
459
460 Returns either self or another node with the same effect.
461 """
462 return self
463
464 def match(self, node, results=None):
465 """
466 Does this pattern exactly match a node?
467
468 Returns True if it matches, False if not.
469
470 If results is not None, it must be a dict which will be
471 updated with the nodes matching named subpatterns.
472
473 Default implementation for non-wildcard patterns.
474 """
475 if self.type is not None and node.type != self.type:
476 return False
477 if self.content is not None:
478 r = None
479 if results is not None:
480 r = {}
481 if not self._submatch(node, r):
482 return False
483 if r:
484 results.update(r)
485 if results is not None and self.name:
486 results[self.name] = node
487 return True
488
489 def match_seq(self, nodes, results=None):
490 """
491 Does this pattern exactly match a sequence of nodes?
492
493 Default implementation for non-wildcard patterns.
494 """
495 if len(nodes) != 1:
496 return False
497 return self.match(nodes[0], results)
498
499 def generate_matches(self, nodes):
500 """
501 Generator yielding all matches for this pattern.
502
503 Default implementation for non-wildcard patterns.
504 """
505 r = {}
506 if nodes and self.match(nodes[0], r):
507 yield 1, r
508
509
510class LeafPattern(BasePattern):
511
512 def __init__(self, type=None, content=None, name=None):
513 """
514 Initializer. Takes optional type, content, and name.
515
516 The type, if given must be a token type (< 256). If not given,
517 this matches any *leaf* node; the content may still be required.
518
519 The content, if given, must be a string.
520
521 If a name is given, the matching node is stored in the results
522 dict under that key.
523 """
524 if type is not None:
525 assert 0 <= type < 256, type
526 if content is not None:
527 assert isinstance(content, basestring), repr(content)
528 self.type = type
529 self.content = content
530 self.name = name
531
532 def match(self, node, results=None):
533 """Override match() to insist on a leaf node."""
534 if not isinstance(node, Leaf):
535 return False
536 return BasePattern.match(self, node, results)
537
538 def _submatch(self, node, results=None):
539 """
540 Match the pattern's content to the node's children.
541
542 This assumes the node type matches and self.content is not None.
543
544 Returns True if it matches, False if not.
545
546 If results is not None, it must be a dict which will be
547 updated with the nodes matching named subpatterns.
548
549 When returning False, the results dict may still be updated.
550 """
551 return self.content == node.value
552
553
554class NodePattern(BasePattern):
555
556 wildcards = False
557
558 def __init__(self, type=None, content=None, name=None):
559 """
560 Initializer. Takes optional type, content, and name.
561
562 The type, if given, must be a symbol type (>= 256). If the
563 type is None this matches *any* single node (leaf or not),
564 except if content is not None, in which it only matches
565 non-leaf nodes that also match the content pattern.
566
567 The content, if not None, must be a sequence of Patterns that
568 must match the node's children exactly. If the content is
569 given, the type must not be None.
570
571 If a name is given, the matching node is stored in the results
572 dict under that key.
573 """
574 if type is not None:
575 assert type >= 256, type
576 if content is not None:
577 assert not isinstance(content, basestring), repr(content)
578 content = list(content)
579 for i, item in enumerate(content):
580 assert isinstance(item, BasePattern), (i, item)
581 if isinstance(item, WildcardPattern):
582 self.wildcards = True
583 self.type = type
584 self.content = content
585 self.name = name
586
587 def _submatch(self, node, results=None):
588 """
589 Match the pattern's content to the node's children.
590
591 This assumes the node type matches and self.content is not None.
592
593 Returns True if it matches, False if not.
594
595 If results is not None, it must be a dict which will be
596 updated with the nodes matching named subpatterns.
597
598 When returning False, the results dict may still be updated.
599 """
600 if self.wildcards:
601 for c, r in generate_matches(self.content, node.children):
602 if c == len(node.children):
603 if results is not None:
604 results.update(r)
605 return True
606 return False
607 if len(self.content) != len(node.children):
608 return False
609 for subpattern, child in zip(self.content, node.children):
610 if not subpattern.match(child, results):
611 return False
612 return True
613
614
615class WildcardPattern(BasePattern):
616
617 """
618 A wildcard pattern can match zero or more nodes.
619
620 This has all the flexibility needed to implement patterns like:
621
622 .* .+ .? .{m,n}
623 (a b c | d e | f)
624 (...)* (...)+ (...)? (...){m,n}
625
626 except it always uses non-greedy matching.
627 """
628
629 def __init__(self, content=None, min=0, max=HUGE, name=None):
630 """
631 Initializer.
632
633 Args:
634 content: optional sequence of subsequences of patterns;
635 if absent, matches one node;
636 if present, each subsequence is an alternative [*]
637 min: optinal minumum number of times to match, default 0
638 max: optional maximum number of times tro match, default HUGE
639 name: optional name assigned to this match
640
641 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
642 equivalent to (a b c | d e | f g h); if content is None,
643 this is equivalent to '.' in regular expression terms.
644 The min and max parameters work as follows:
645 min=0, max=maxint: .*
646 min=1, max=maxint: .+
647 min=0, max=1: .?
648 min=1, max=1: .
649 If content is not None, replace the dot with the parenthesized
650 list of alternatives, e.g. (a b c | d e | f g h)*
651 """
652 assert 0 <= min <= max <= HUGE, (min, max)
653 if content is not None:
654 content = tuple(map(tuple, content)) # Protect against alterations
655 # Check sanity of alternatives
656 assert len(content), repr(content) # Can't have zero alternatives
657 for alt in content:
658 assert len(alt), repr(alt) # Can have empty alternatives
659 self.content = content
660 self.min = min
661 self.max = max
662 self.name = name
663
664 def optimize(self):
665 """Optimize certain stacked wildcard patterns."""
666 subpattern = None
667 if (self.content is not None and
668 len(self.content) == 1 and len(self.content[0]) == 1):
669 subpattern = self.content[0][0]
670 if self.min == 1 and self.max == 1:
671 if self.content is None:
672 return NodePattern(name=self.name)
673 if subpattern is not None and self.name == subpattern.name:
674 return subpattern.optimize()
675 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
676 subpattern.min <= 1 and self.name == subpattern.name):
677 return WildcardPattern(subpattern.content,
678 self.min*subpattern.min,
679 self.max*subpattern.max,
680 subpattern.name)
681 return self
682
683 def match(self, node, results=None):
684 """Does this pattern exactly match a node?"""
685 return self.match_seq([node], results)
686
687 def match_seq(self, nodes, results=None):
688 """Does this pattern exactly match a sequence of nodes?"""
689 for c, r in self.generate_matches(nodes):
690 if c == len(nodes):
691 if results is not None:
692 results.update(r)
693 if self.name:
694 results[self.name] = list(nodes)
695 return True
696 return False
697
698 def generate_matches(self, nodes):
699 """
700 Generator yielding matches for a sequence of nodes.
701
702 Args:
703 nodes: sequence of nodes
704
705 Yields:
706 (count, results) tuples where:
707 count: the match comprises nodes[:count];
708 results: dict containing named submatches.
709 """
710 if self.content is None:
711 # Shortcut for special case (see __init__.__doc__)
712 for count in xrange(self.min, 1 + min(len(nodes), self.max)):
713 r = {}
714 if self.name:
715 r[self.name] = nodes[:count]
716 yield count, r
717 elif self.name == "bare_name":
718 yield self._bare_name_matches(nodes)
719 else:
720 # The reason for this is that hitting the recursion limit usually
721 # results in some ugly messages about how RuntimeErrors are being
722 # ignored.
723 save_stderr = sys.stderr
724 sys.stderr = StringIO()
725 try:
726 for count, r in self._recursive_matches(nodes, 0):
727 if self.name:
728 r[self.name] = nodes[:count]
729 yield count, r
730 except RuntimeError:
731 # We fall back to the iterative pattern matching scheme if the recursive
732 # scheme hits the recursion limit.
733 for count, r in self._iterative_matches(nodes):
734 if self.name:
735 r[self.name] = nodes[:count]
736 yield count, r
737 finally:
738 sys.stderr = save_stderr
739
740 def _iterative_matches(self, nodes):
741 """Helper to iteratively yield the matches."""
742 nodelen = len(nodes)
743 if 0 >= self.min:
744 yield 0, {}
745
746 results = []
747 # generate matches that use just one alt from self.content
748 for alt in self.content:
749 for c, r in generate_matches(alt, nodes):
750 yield c, r
751 results.append((c, r))
752
753 # for each match, iterate down the nodes
754 while results:
755 new_results = []
756 for c0, r0 in results:
757 # stop if the entire set of nodes has been matched
758 if c0 < nodelen and c0 <= self.max:
759 for alt in self.content:
760 for c1, r1 in generate_matches(alt, nodes[c0:]):
761 if c1 > 0:
762 r = {}
763 r.update(r0)
764 r.update(r1)
765 yield c0 + c1, r
766 new_results.append((c0 + c1, r))
767 results = new_results
768
769 def _bare_name_matches(self, nodes):
770 """Special optimized matcher for bare_name."""
771 count = 0
772 r = {}
773 done = False
774 max = len(nodes)
775 while not done and count < max:
776 done = True
777 for leaf in self.content:
778 if leaf[0].match(nodes[count], r):
779 count += 1
780 done = False
781 break
782 r[self.name] = nodes[:count]
783 return count, r
784
785 def _recursive_matches(self, nodes, count):
786 """Helper to recursively yield the matches."""
787 assert self.content is not None
788 if count >= self.min:
789 yield 0, {}
790 if count < self.max:
791 for alt in self.content:
792 for c0, r0 in generate_matches(alt, nodes):
793 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
794 r = {}
795 r.update(r0)
796 r.update(r1)
797 yield c0 + c1, r
798
799
800class NegatedPattern(BasePattern):
801
802 def __init__(self, content=None):
803 """
804 Initializer.
805
806 The argument is either a pattern or None. If it is None, this
807 only matches an empty sequence (effectively '$' in regex
808 lingo). If it is not None, this matches whenever the argument
809 pattern doesn't have any matches.
810 """
811 if content is not None:
812 assert isinstance(content, BasePattern), repr(content)
813 self.content = content
814
815 def match(self, node):
816 # We never match a node in its entirety
817 return False
818
819 def match_seq(self, nodes):
820 # We only match an empty sequence of nodes in its entirety
821 return len(nodes) == 0
822
823 def generate_matches(self, nodes):
824 if self.content is None:
825 # Return a match if there is an empty sequence
826 if len(nodes) == 0:
827 yield 0, {}
828 else:
829 # Return a match if the argument pattern has no matches
830 for c, r in self.content.generate_matches(nodes):
831 return
832 yield 0, {}
833
834
835def generate_matches(patterns, nodes):
836 """
837 Generator yielding matches for a sequence of patterns and nodes.
838
839 Args:
840 patterns: a sequence of patterns
841 nodes: a sequence of nodes
842
843 Yields:
844 (count, results) tuples where:
845 count: the entire sequence of patterns matches nodes[:count];
846 results: dict containing named submatches.
847 """
848 if not patterns:
849 yield 0, {}
850 else:
851 p, rest = patterns[0], patterns[1:]
852 for c0, r0 in p.generate_matches(nodes):
853 if not rest:
854 yield c0, r0
855 else:
856 for c1, r1 in generate_matches(rest, nodes[c0:]):
857 r = {}
858 r.update(r0)
859 r.update(r1)
860 yield c0 + c1, r