blob: 08198fd84f74369bad40599ae753434938008558 [file] [log] [blame]
Martin v. Löwis5e37bae2008-03-19 04:43:46 +00001# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
4"""Python parse tree definitions.
5
6This is a very concrete parse tree; we need to keep every token and
7even the comments and whitespace between tokens.
8
9There's also a pattern matching implementation here.
10"""
11
12__author__ = "Guido van Rossum <guido@python.org>"
13
14
15HUGE = 0x7FFFFFFF # maximum repeat count, default max
16
Martin v. Löwisbaf267c2008-03-22 00:01:12 +000017_type_reprs = {}
18def type_repr(type_num):
19 global _type_reprs
20 if not _type_reprs:
21 from .pygram import python_symbols
22 # printing tokens is possible but not as useful
23 # from .pgen2 import token // token.__dict__.items():
24 for name, val in python_symbols.__dict__.items():
25 if type(val) == int: _type_reprs[val] = name
26 return _type_reprs.setdefault(type_num, type_num)
27
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000028
29class Base(object):
30
31 """Abstract base class for Node and Leaf.
32
33 This provides some default functionality and boilerplate using the
34 template pattern.
35
36 A node may be a subnode of at most one parent.
37 """
38
39 # Default values for instance variables
40 type = None # int: token number (< 256) or symbol number (>= 256)
41 parent = None # Parent node pointer, or None
42 children = () # Tuple of subnodes
43 was_changed = False
44
45 def __new__(cls, *args, **kwds):
46 """Constructor that prevents Base from being instantiated."""
47 assert cls is not Base, "Cannot instantiate Base"
48 return object.__new__(cls)
49
50 def __eq__(self, other):
51 """Compares two nodes for equality.
52
53 This calls the method _eq().
54 """
55 if self.__class__ is not other.__class__:
56 return NotImplemented
57 return self._eq(other)
58
59 def __ne__(self, other):
60 """Compares two nodes for inequality.
61
62 This calls the method _eq().
63 """
64 if self.__class__ is not other.__class__:
65 return NotImplemented
66 return not self._eq(other)
67
68 def _eq(self, other):
69 """Compares two nodes for equality.
70
71 This is called by __eq__ and __ne__. It is only called if the
72 two nodes have the same type. This must be implemented by the
73 concrete subclass. Nodes should be considered equal if they
74 have the same structure, ignoring the prefix string and other
75 context information.
76 """
77 raise NotImplementedError
78
79 def clone(self):
80 """Returns a cloned (deep) copy of self.
81
82 This must be implemented by the concrete subclass.
83 """
84 raise NotImplementedError
85
86 def post_order(self):
87 """Returns a post-order iterator for the tree.
88
89 This must be implemented by the concrete subclass.
90 """
91 raise NotImplementedError
92
93 def pre_order(self):
94 """Returns a pre-order iterator for the tree.
95
96 This must be implemented by the concrete subclass.
97 """
98 raise NotImplementedError
99
100 def set_prefix(self, prefix):
101 """Sets the prefix for the node (see Leaf class).
102
103 This must be implemented by the concrete subclass.
104 """
105 raise NotImplementedError
106
107 def get_prefix(self):
108 """Returns the prefix for the node (see Leaf class).
109
110 This must be implemented by the concrete subclass.
111 """
112 raise NotImplementedError
113
114 def replace(self, new):
115 """Replaces this node with a new one in the parent."""
116 assert self.parent is not None, str(self)
117 assert new is not None
118 if not isinstance(new, list):
119 new = [new]
120 l_children = []
121 found = False
122 for ch in self.parent.children:
123 if ch is self:
124 assert not found, (self.parent.children, self, new)
125 if new is not None:
126 l_children.extend(new)
127 found = True
128 else:
129 l_children.append(ch)
130 assert found, (self.children, self, new)
131 self.parent.changed()
132 self.parent.children = l_children
133 for x in new:
134 x.parent = self.parent
135 self.parent = None
136
137 def get_lineno(self):
138 """Returns the line number which generated the invocant node."""
139 node = self
140 while not isinstance(node, Leaf):
141 if not node.children:
142 return
143 node = node.children[0]
144 return node.lineno
145
146 def changed(self):
147 if self.parent:
148 self.parent.changed()
149 self.was_changed = True
150
151 def remove(self):
152 """Remove the node from the tree. Returns the position of the node
153 in its parent's children before it was removed."""
154 if self.parent:
155 for i, node in enumerate(self.parent.children):
156 if node is self:
157 self.parent.changed()
158 del self.parent.children[i]
159 self.parent = None
160 return i
161
162 def get_next_sibling(self):
163 """Return the node immediately following the invocant in their
164 parent's children list. If the invocant does not have a next
165 sibling, return None."""
166 if self.parent is None:
167 return None
168
169 # Can't use index(); we need to test by identity
Martin v. Löwis60a819d2008-04-10 02:48:01 +0000170 for i, child in enumerate(self.parent.children):
171 if child is self:
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000172 try:
173 return self.parent.children[i+1]
174 except IndexError:
175 return None
176
Martin v. Löwis60a819d2008-04-10 02:48:01 +0000177 def get_prev_sibling(self):
178 """Return the node immediately preceding the invocant in their
179 parent's children list. If the invocant does not have a previous
180 sibling, return None."""
181 if self.parent is None:
182 return None
183
184 # Can't use index(); we need to test by identity
185 for i, child in enumerate(self.parent.children):
186 if child is self:
187 if i == 0:
188 return None
189 return self.parent.children[i-1]
190
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000191 def get_suffix(self):
192 """Return the string immediately following the invocant node. This
193 is effectively equivalent to node.get_next_sibling().get_prefix()"""
194 next_sib = self.get_next_sibling()
195 if next_sib is None:
196 return ""
197 return next_sib.get_prefix()
198
199
200class Node(Base):
201
202 """Concrete implementation for interior nodes."""
203
204 def __init__(self, type, children, context=None, prefix=None):
205 """Initializer.
206
207 Takes a type constant (a symbol number >= 256), a sequence of
208 child nodes, and an optional context keyword argument.
209
210 As a side effect, the parent pointers of the children are updated.
211 """
212 assert type >= 256, type
213 self.type = type
214 self.children = list(children)
215 for ch in self.children:
216 assert ch.parent is None, repr(ch)
217 ch.parent = self
218 if prefix is not None:
219 self.set_prefix(prefix)
220
221 def __repr__(self):
222 """Returns a canonical string representation."""
Martin v. Löwisbaf267c2008-03-22 00:01:12 +0000223 return "%s(%s, %r)" % (self.__class__.__name__,
224 type_repr(self.type),
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000225 self.children)
226
227 def __str__(self):
228 """Returns a pretty string representation.
229
230 This reproduces the input source exactly.
231 """
232 return "".join(map(str, self.children))
233
234 def _eq(self, other):
235 """Compares two nodes for equality."""
236 return (self.type, self.children) == (other.type, other.children)
237
238 def clone(self):
239 """Returns a cloned (deep) copy of self."""
240 return Node(self.type, [ch.clone() for ch in self.children])
241
242 def post_order(self):
243 """Returns a post-order iterator for the tree."""
244 for child in self.children:
245 for node in child.post_order():
246 yield node
247 yield self
248
249 def pre_order(self):
250 """Returns a pre-order iterator for the tree."""
251 yield self
252 for child in self.children:
253 for node in child.post_order():
254 yield node
255
256 def set_prefix(self, prefix):
257 """Sets the prefix for the node.
258
259 This passes the responsibility on to the first child.
260 """
261 if self.children:
262 self.children[0].set_prefix(prefix)
263
264 def get_prefix(self):
265 """Returns the prefix for the node.
266
267 This passes the call on to the first child.
268 """
269 if not self.children:
270 return ""
271 return self.children[0].get_prefix()
272
273 def set_child(self, i, child):
274 """Equivalent to 'node.children[i] = child'. This method also sets the
275 child's parent attribute appropriately."""
276 child.parent = self
277 self.children[i].parent = None
278 self.children[i] = child
279
280 def insert_child(self, i, child):
281 """Equivalent to 'node.children.insert(i, child)'. This method also
282 sets the child's parent attribute appropriately."""
283 child.parent = self
284 self.children.insert(i, child)
285
286 def append_child(self, child):
287 """Equivalent to 'node.children.append(child)'. This method also
288 sets the child's parent attribute appropriately."""
289 child.parent = self
290 self.children.append(child)
291
292
293class Leaf(Base):
294
295 """Concrete implementation for leaf nodes."""
296
297 # Default values for instance variables
298 prefix = "" # Whitespace and comments preceding this token in the input
299 lineno = 0 # Line where this token starts in the input
300 column = 0 # Column where this token tarts in the input
301
302 def __init__(self, type, value, context=None, prefix=None):
303 """Initializer.
304
305 Takes a type constant (a token number < 256), a string value,
306 and an optional context keyword argument.
307 """
308 assert 0 <= type < 256, type
309 if context is not None:
310 self.prefix, (self.lineno, self.column) = context
311 self.type = type
312 self.value = value
313 if prefix is not None:
314 self.prefix = prefix
315
316 def __repr__(self):
317 """Returns a canonical string representation."""
318 return "%s(%r, %r)" % (self.__class__.__name__,
319 self.type,
320 self.value)
321
322 def __str__(self):
323 """Returns a pretty string representation.
324
325 This reproduces the input source exactly.
326 """
327 return self.prefix + str(self.value)
328
329 def _eq(self, other):
330 """Compares two nodes for equality."""
331 return (self.type, self.value) == (other.type, other.value)
332
333 def clone(self):
334 """Returns a cloned (deep) copy of self."""
335 return Leaf(self.type, self.value,
336 (self.prefix, (self.lineno, self.column)))
337
338 def post_order(self):
339 """Returns a post-order iterator for the tree."""
340 yield self
341
342 def pre_order(self):
343 """Returns a pre-order iterator for the tree."""
344 yield self
345
346 def set_prefix(self, prefix):
347 """Sets the prefix for the node."""
348 self.changed()
349 self.prefix = prefix
350
351 def get_prefix(self):
352 """Returns the prefix for the node."""
353 return self.prefix
354
355
356def convert(gr, raw_node):
357 """Converts raw node information to a Node or Leaf instance.
358
359 This is passed to the parser driver which calls it whenever a
360 reduction of a grammar rule produces a new complete node, so that
361 the tree is build strictly bottom-up.
362 """
363 type, value, context, children = raw_node
364 if children or type in gr.number2symbol:
365 # If there's exactly one child, return that child instead of
366 # creating a new node.
367 if len(children) == 1:
368 return children[0]
369 return Node(type, children, context=context)
370 else:
371 return Leaf(type, value, context=context)
372
373
374class BasePattern(object):
375
376 """A pattern is a tree matching pattern.
377
378 It looks for a specific node type (token or symbol), and
379 optionally for a specific content.
380
381 This is an abstract base class. There are three concrete
382 subclasses:
383
384 - LeafPattern matches a single leaf node;
385 - NodePattern matches a single node (usually non-leaf);
386 - WildcardPattern matches a sequence of nodes of variable length.
387 """
388
389 # Defaults for instance variables
390 type = None # Node type (token if < 256, symbol if >= 256)
391 content = None # Optional content matching pattern
392 name = None # Optional name used to store match in results dict
393
394 def __new__(cls, *args, **kwds):
395 """Constructor that prevents BasePattern from being instantiated."""
396 assert cls is not BasePattern, "Cannot instantiate BasePattern"
397 return object.__new__(cls)
398
399 def __repr__(self):
Martin v. Löwisbaf267c2008-03-22 00:01:12 +0000400 args = [type_repr(self.type), self.content, self.name]
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000401 while args and args[-1] is None:
402 del args[-1]
403 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
404
405 def optimize(self):
406 """A subclass can define this as a hook for optimizations.
407
408 Returns either self or another node with the same effect.
409 """
410 return self
411
412 def match(self, node, results=None):
413 """Does this pattern exactly match a node?
414
415 Returns True if it matches, False if not.
416
417 If results is not None, it must be a dict which will be
418 updated with the nodes matching named subpatterns.
419
420 Default implementation for non-wildcard patterns.
421 """
422 if self.type is not None and node.type != self.type:
423 return False
424 if self.content is not None:
425 r = None
426 if results is not None:
427 r = {}
428 if not self._submatch(node, r):
429 return False
430 if r:
431 results.update(r)
432 if results is not None and self.name:
433 results[self.name] = node
434 return True
435
436 def match_seq(self, nodes, results=None):
437 """Does this pattern exactly match a sequence of nodes?
438
439 Default implementation for non-wildcard patterns.
440 """
441 if len(nodes) != 1:
442 return False
443 return self.match(nodes[0], results)
444
445 def generate_matches(self, nodes):
446 """Generator yielding all matches for this pattern.
447
448 Default implementation for non-wildcard patterns.
449 """
450 r = {}
451 if nodes and self.match(nodes[0], r):
452 yield 1, r
453
454
455class LeafPattern(BasePattern):
456
457 def __init__(self, type=None, content=None, name=None):
458 """Initializer. Takes optional type, content, and name.
459
460 The type, if given must be a token type (< 256). If not given,
461 this matches any *leaf* node; the content may still be required.
462
463 The content, if given, must be a string.
464
465 If a name is given, the matching node is stored in the results
466 dict under that key.
467 """
468 if type is not None:
469 assert 0 <= type < 256, type
470 if content is not None:
471 assert isinstance(content, basestring), repr(content)
472 self.type = type
473 self.content = content
474 self.name = name
475
476 def match(self, node, results=None):
477 """Override match() to insist on a leaf node."""
478 if not isinstance(node, Leaf):
479 return False
480 return BasePattern.match(self, node, results)
481
482 def _submatch(self, node, results=None):
483 """Match the pattern's content to the node's children.
484
485 This assumes the node type matches and self.content is not None.
486
487 Returns True if it matches, False if not.
488
489 If results is not None, it must be a dict which will be
490 updated with the nodes matching named subpatterns.
491
492 When returning False, the results dict may still be updated.
493 """
494 return self.content == node.value
495
496
497class NodePattern(BasePattern):
498
499 wildcards = False
500
501 def __init__(self, type=None, content=None, name=None):
502 """Initializer. Takes optional type, content, and name.
503
504 The type, if given, must be a symbol type (>= 256). If the
505 type is None this matches *any* single node (leaf or not),
506 except if content is not None, in which it only matches
507 non-leaf nodes that also match the content pattern.
508
509 The content, if not None, must be a sequence of Patterns that
510 must match the node's children exactly. If the content is
511 given, the type must not be None.
512
513 If a name is given, the matching node is stored in the results
514 dict under that key.
515 """
516 if type is not None:
517 assert type >= 256, type
518 if content is not None:
519 assert not isinstance(content, basestring), repr(content)
520 content = list(content)
521 for i, item in enumerate(content):
522 assert isinstance(item, BasePattern), (i, item)
523 if isinstance(item, WildcardPattern):
524 self.wildcards = True
525 self.type = type
526 self.content = content
527 self.name = name
528
529 def _submatch(self, node, results=None):
530 """Match the pattern's content to the node's children.
531
532 This assumes the node type matches and self.content is not None.
533
534 Returns True if it matches, False if not.
535
536 If results is not None, it must be a dict which will be
537 updated with the nodes matching named subpatterns.
538
539 When returning False, the results dict may still be updated.
540 """
541 if self.wildcards:
542 for c, r in generate_matches(self.content, node.children):
543 if c == len(node.children):
544 if results is not None:
545 results.update(r)
546 return True
547 return False
548 if len(self.content) != len(node.children):
549 return False
550 for subpattern, child in zip(self.content, node.children):
551 if not subpattern.match(child, results):
552 return False
553 return True
554
555
556class WildcardPattern(BasePattern):
557
558 """A wildcard pattern can match zero or more nodes.
559
560 This has all the flexibility needed to implement patterns like:
561
562 .* .+ .? .{m,n}
563 (a b c | d e | f)
564 (...)* (...)+ (...)? (...){m,n}
565
566 except it always uses non-greedy matching.
567 """
568
569 def __init__(self, content=None, min=0, max=HUGE, name=None):
570 """Initializer.
571
572 Args:
573 content: optional sequence of subsequences of patterns;
574 if absent, matches one node;
575 if present, each subsequence is an alternative [*]
576 min: optinal minumum number of times to match, default 0
577 max: optional maximum number of times tro match, default HUGE
578 name: optional name assigned to this match
579
580 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
581 equivalent to (a b c | d e | f g h); if content is None,
582 this is equivalent to '.' in regular expression terms.
583 The min and max parameters work as follows:
584 min=0, max=maxint: .*
585 min=1, max=maxint: .+
586 min=0, max=1: .?
587 min=1, max=1: .
588 If content is not None, replace the dot with the parenthesized
589 list of alternatives, e.g. (a b c | d e | f g h)*
590 """
591 assert 0 <= min <= max <= HUGE, (min, max)
592 if content is not None:
593 content = tuple(map(tuple, content)) # Protect against alterations
594 # Check sanity of alternatives
595 assert len(content), repr(content) # Can't have zero alternatives
596 for alt in content:
597 assert len(alt), repr(alt) # Can have empty alternatives
598 self.content = content
599 self.min = min
600 self.max = max
601 self.name = name
602
603 def optimize(self):
604 """Optimize certain stacked wildcard patterns."""
605 subpattern = None
606 if (self.content is not None and
607 len(self.content) == 1 and len(self.content[0]) == 1):
608 subpattern = self.content[0][0]
609 if self.min == 1 and self.max == 1:
610 if self.content is None:
611 return NodePattern(name=self.name)
612 if subpattern is not None and self.name == subpattern.name:
613 return subpattern.optimize()
614 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
615 subpattern.min <= 1 and self.name == subpattern.name):
616 return WildcardPattern(subpattern.content,
617 self.min*subpattern.min,
618 self.max*subpattern.max,
619 subpattern.name)
620 return self
621
622 def match(self, node, results=None):
623 """Does this pattern exactly match a node?"""
624 return self.match_seq([node], results)
625
626 def match_seq(self, nodes, results=None):
627 """Does this pattern exactly match a sequence of nodes?"""
628 for c, r in self.generate_matches(nodes):
629 if c == len(nodes):
630 if results is not None:
631 results.update(r)
632 if self.name:
633 results[self.name] = list(nodes)
634 return True
635 return False
636
637 def generate_matches(self, nodes):
638 """Generator yielding matches for a sequence of nodes.
639
640 Args:
641 nodes: sequence of nodes
642
643 Yields:
644 (count, results) tuples where:
645 count: the match comprises nodes[:count];
646 results: dict containing named submatches.
647 """
648 if self.content is None:
649 # Shortcut for special case (see __init__.__doc__)
650 for count in xrange(self.min, 1 + min(len(nodes), self.max)):
651 r = {}
652 if self.name:
653 r[self.name] = nodes[:count]
654 yield count, r
Benjamin Peterson1ab31492008-07-17 02:07:46 +0000655 elif self.name == "bare_name":
656 yield self._bare_name_matches(nodes)
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000657 else:
Benjamin Peterson4d164152008-10-04 20:55:50 +0000658 try:
659 for count, r in self._recursive_matches(nodes, 0):
660 if self.name:
661 r[self.name] = nodes[:count]
662 yield count, r
663 except RuntimeError:
664 # We fall back to the iterative pattern matching scheme if the recursive
665 # scheme hits the recursion limit.
666 for count, r in self._iterative_matches(nodes):
667 if self.name:
668 r[self.name] = nodes[:count]
669 yield count, r
670
671 def _iterative_matches(self, nodes):
672 """Helper to iteratively yield the matches."""
673 nodelen = len(nodes)
674 if 0 >= self.min:
675 yield 0, {}
676
677 results = []
678 # generate matches that use just one alt from self.content
679 for alt in self.content:
680 for c, r in generate_matches(alt, nodes):
681 yield c, r
682 results.append((c, r))
683
684 # for each match, iterate down the nodes
685 while results:
686 new_results = []
687 for c0, r0 in results:
688 # stop if the entire set of nodes has been matched
689 if c0 < nodelen and c0 <= self.max:
690 for alt in self.content:
691 for c1, r1 in generate_matches(alt, nodes[c0:]):
692 if c1 > 0:
693 r = {}
694 r.update(r0)
695 r.update(r1)
696 yield c0 + c1, r
697 new_results.append((c0 + c1, r))
698 results = new_results
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000699
Benjamin Peterson1ab31492008-07-17 02:07:46 +0000700 def _bare_name_matches(self, nodes):
701 """Special optimized matcher for bare_name."""
702 count = 0
703 r = {}
704 done = False
705 max = len(nodes)
706 while not done and count < max:
707 done = True
708 for leaf in self.content:
709 if leaf[0].match(nodes[count], r):
710 count += 1
711 done = False
712 break
713 r[self.name] = nodes[:count]
714 return count, r
715
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000716 def _recursive_matches(self, nodes, count):
717 """Helper to recursively yield the matches."""
718 assert self.content is not None
719 if count >= self.min:
Benjamin Peterson1ab31492008-07-17 02:07:46 +0000720 yield 0, {}
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000721 if count < self.max:
722 for alt in self.content:
723 for c0, r0 in generate_matches(alt, nodes):
724 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
725 r = {}
726 r.update(r0)
727 r.update(r1)
728 yield c0 + c1, r
729
730
731class NegatedPattern(BasePattern):
732
733 def __init__(self, content=None):
734 """Initializer.
735
736 The argument is either a pattern or None. If it is None, this
737 only matches an empty sequence (effectively '$' in regex
738 lingo). If it is not None, this matches whenever the argument
739 pattern doesn't have any matches.
740 """
741 if content is not None:
742 assert isinstance(content, BasePattern), repr(content)
743 self.content = content
744
745 def match(self, node):
746 # We never match a node in its entirety
747 return False
748
749 def match_seq(self, nodes):
750 # We only match an empty sequence of nodes in its entirety
751 return len(nodes) == 0
752
753 def generate_matches(self, nodes):
754 if self.content is None:
755 # Return a match if there is an empty sequence
756 if len(nodes) == 0:
757 yield 0, {}
758 else:
759 # Return a match if the argument pattern has no matches
760 for c, r in self.content.generate_matches(nodes):
761 return
762 yield 0, {}
763
764
765def generate_matches(patterns, nodes):
766 """Generator yielding matches for a sequence of patterns and nodes.
767
768 Args:
769 patterns: a sequence of patterns
770 nodes: a sequence of nodes
771
772 Yields:
773 (count, results) tuples where:
774 count: the entire sequence of patterns matches nodes[:count];
775 results: dict containing named submatches.
776 """
777 if not patterns:
778 yield 0, {}
779 else:
780 p, rest = patterns[0], patterns[1:]
781 for c0, r0 in p.generate_matches(nodes):
782 if not rest:
783 yield c0, r0
784 else:
785 for c1, r1 in generate_matches(rest, nodes[c0:]):
786 r = {}
787 r.update(r0)
788 r.update(r1)
789 yield c0 + c1, r