blob: efdeb053f6109c3bbc970a3bd385b908d77f2319 [file] [log] [blame]
Martin v. Löwisef04c442008-03-19 05:04:44 +00001# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
4"""Python parse tree definitions.
5
6This is a very concrete parse tree; we need to keep every token and
7even the comments and whitespace between tokens.
8
9There's also a pattern matching implementation here.
10"""
11
12__author__ = "Guido van Rossum <guido@python.org>"
13
14
15HUGE = 0x7FFFFFFF # maximum repeat count, default max
16
Martin v. Löwis3faa84f2008-03-22 00:07:09 +000017_type_reprs = {}
18def type_repr(type_num):
19 global _type_reprs
20 if not _type_reprs:
21 from .pygram import python_symbols
22 # printing tokens is possible but not as useful
23 # from .pgen2 import token // token.__dict__.items():
24 for name, val in python_symbols.__dict__.items():
25 if type(val) == int: _type_reprs[val] = name
26 return _type_reprs.setdefault(type_num, type_num)
27
Martin v. Löwisef04c442008-03-19 05:04:44 +000028
29class Base(object):
30
31 """Abstract base class for Node and Leaf.
32
33 This provides some default functionality and boilerplate using the
34 template pattern.
35
36 A node may be a subnode of at most one parent.
37 """
38
39 # Default values for instance variables
40 type = None # int: token number (< 256) or symbol number (>= 256)
41 parent = None # Parent node pointer, or None
42 children = () # Tuple of subnodes
43 was_changed = False
44
45 def __new__(cls, *args, **kwds):
46 """Constructor that prevents Base from being instantiated."""
47 assert cls is not Base, "Cannot instantiate Base"
48 return object.__new__(cls)
49
50 def __eq__(self, other):
51 """Compares two nodes for equality.
52
53 This calls the method _eq().
54 """
55 if self.__class__ is not other.__class__:
56 return NotImplemented
57 return self._eq(other)
58
59 def __ne__(self, other):
60 """Compares two nodes for inequality.
61
62 This calls the method _eq().
63 """
64 if self.__class__ is not other.__class__:
65 return NotImplemented
66 return not self._eq(other)
67
68 def _eq(self, other):
69 """Compares two nodes for equality.
70
71 This is called by __eq__ and __ne__. It is only called if the
72 two nodes have the same type. This must be implemented by the
73 concrete subclass. Nodes should be considered equal if they
74 have the same structure, ignoring the prefix string and other
75 context information.
76 """
77 raise NotImplementedError
78
79 def clone(self):
80 """Returns a cloned (deep) copy of self.
81
82 This must be implemented by the concrete subclass.
83 """
84 raise NotImplementedError
85
86 def post_order(self):
87 """Returns a post-order iterator for the tree.
88
89 This must be implemented by the concrete subclass.
90 """
91 raise NotImplementedError
92
93 def pre_order(self):
94 """Returns a pre-order iterator for the tree.
95
96 This must be implemented by the concrete subclass.
97 """
98 raise NotImplementedError
99
100 def set_prefix(self, prefix):
101 """Sets the prefix for the node (see Leaf class).
102
103 This must be implemented by the concrete subclass.
104 """
105 raise NotImplementedError
106
107 def get_prefix(self):
108 """Returns the prefix for the node (see Leaf class).
109
110 This must be implemented by the concrete subclass.
111 """
112 raise NotImplementedError
113
114 def replace(self, new):
115 """Replaces this node with a new one in the parent."""
116 assert self.parent is not None, str(self)
117 assert new is not None
118 if not isinstance(new, list):
119 new = [new]
120 l_children = []
121 found = False
122 for ch in self.parent.children:
123 if ch is self:
124 assert not found, (self.parent.children, self, new)
125 if new is not None:
126 l_children.extend(new)
127 found = True
128 else:
129 l_children.append(ch)
130 assert found, (self.children, self, new)
131 self.parent.changed()
132 self.parent.children = l_children
133 for x in new:
134 x.parent = self.parent
135 self.parent = None
136
137 def get_lineno(self):
138 """Returns the line number which generated the invocant node."""
139 node = self
140 while not isinstance(node, Leaf):
141 if not node.children:
142 return
143 node = node.children[0]
144 return node.lineno
145
146 def changed(self):
147 if self.parent:
148 self.parent.changed()
149 self.was_changed = True
150
151 def remove(self):
152 """Remove the node from the tree. Returns the position of the node
153 in its parent's children before it was removed."""
154 if self.parent:
155 for i, node in enumerate(self.parent.children):
156 if node is self:
157 self.parent.changed()
158 del self.parent.children[i]
159 self.parent = None
160 return i
161
162 def get_next_sibling(self):
163 """Return the node immediately following the invocant in their
164 parent's children list. If the invocant does not have a next
165 sibling, return None."""
166 if self.parent is None:
167 return None
168
169 # Can't use index(); we need to test by identity
Martin v. Löwis3de92bf2008-04-10 02:50:50 +0000170 for i, child in enumerate(self.parent.children):
171 if child is self:
Martin v. Löwisef04c442008-03-19 05:04:44 +0000172 try:
173 return self.parent.children[i+1]
174 except IndexError:
175 return None
176
Martin v. Löwis3de92bf2008-04-10 02:50:50 +0000177 def get_prev_sibling(self):
178 """Return the node immediately preceding the invocant in their
179 parent's children list. If the invocant does not have a previous
180 sibling, return None."""
181 if self.parent is None:
182 return None
183
184 # Can't use index(); we need to test by identity
185 for i, child in enumerate(self.parent.children):
186 if child is self:
187 if i == 0:
188 return None
189 return self.parent.children[i-1]
190
Martin v. Löwisef04c442008-03-19 05:04:44 +0000191 def get_suffix(self):
192 """Return the string immediately following the invocant node. This
193 is effectively equivalent to node.get_next_sibling().get_prefix()"""
194 next_sib = self.get_next_sibling()
195 if next_sib is None:
196 return ""
197 return next_sib.get_prefix()
198
199
200class Node(Base):
201
202 """Concrete implementation for interior nodes."""
203
204 def __init__(self, type, children, context=None, prefix=None):
205 """Initializer.
206
207 Takes a type constant (a symbol number >= 256), a sequence of
208 child nodes, and an optional context keyword argument.
209
210 As a side effect, the parent pointers of the children are updated.
211 """
212 assert type >= 256, type
213 self.type = type
214 self.children = list(children)
215 for ch in self.children:
216 assert ch.parent is None, repr(ch)
217 ch.parent = self
218 if prefix is not None:
219 self.set_prefix(prefix)
220
221 def __repr__(self):
222 """Returns a canonical string representation."""
Martin v. Löwis3faa84f2008-03-22 00:07:09 +0000223 return "%s(%s, %r)" % (self.__class__.__name__,
224 type_repr(self.type),
Martin v. Löwisef04c442008-03-19 05:04:44 +0000225 self.children)
226
227 def __str__(self):
228 """Returns a pretty string representation.
229
230 This reproduces the input source exactly.
231 """
232 return "".join(map(str, self.children))
233
234 def _eq(self, other):
235 """Compares two nodes for equality."""
236 return (self.type, self.children) == (other.type, other.children)
237
238 def clone(self):
239 """Returns a cloned (deep) copy of self."""
240 return Node(self.type, [ch.clone() for ch in self.children])
241
242 def post_order(self):
243 """Returns a post-order iterator for the tree."""
244 for child in self.children:
245 for node in child.post_order():
246 yield node
247 yield self
248
249 def pre_order(self):
250 """Returns a pre-order iterator for the tree."""
251 yield self
252 for child in self.children:
253 for node in child.post_order():
254 yield node
255
256 def set_prefix(self, prefix):
257 """Sets the prefix for the node.
258
259 This passes the responsibility on to the first child.
260 """
261 if self.children:
262 self.children[0].set_prefix(prefix)
263
264 def get_prefix(self):
265 """Returns the prefix for the node.
266
267 This passes the call on to the first child.
268 """
269 if not self.children:
270 return ""
271 return self.children[0].get_prefix()
272
273 def set_child(self, i, child):
274 """Equivalent to 'node.children[i] = child'. This method also sets the
275 child's parent attribute appropriately."""
276 child.parent = self
277 self.children[i].parent = None
278 self.children[i] = child
279
280 def insert_child(self, i, child):
281 """Equivalent to 'node.children.insert(i, child)'. This method also
282 sets the child's parent attribute appropriately."""
283 child.parent = self
284 self.children.insert(i, child)
285
286 def append_child(self, child):
287 """Equivalent to 'node.children.append(child)'. This method also
288 sets the child's parent attribute appropriately."""
289 child.parent = self
290 self.children.append(child)
291
292
293class Leaf(Base):
294
295 """Concrete implementation for leaf nodes."""
296
297 # Default values for instance variables
298 prefix = "" # Whitespace and comments preceding this token in the input
299 lineno = 0 # Line where this token starts in the input
300 column = 0 # Column where this token tarts in the input
301
302 def __init__(self, type, value, context=None, prefix=None):
303 """Initializer.
304
305 Takes a type constant (a token number < 256), a string value,
306 and an optional context keyword argument.
307 """
308 assert 0 <= type < 256, type
309 if context is not None:
310 self.prefix, (self.lineno, self.column) = context
311 self.type = type
312 self.value = value
313 if prefix is not None:
314 self.prefix = prefix
315
316 def __repr__(self):
317 """Returns a canonical string representation."""
318 return "%s(%r, %r)" % (self.__class__.__name__,
319 self.type,
320 self.value)
321
322 def __str__(self):
323 """Returns a pretty string representation.
324
325 This reproduces the input source exactly.
326 """
327 return self.prefix + str(self.value)
328
329 def _eq(self, other):
330 """Compares two nodes for equality."""
331 return (self.type, self.value) == (other.type, other.value)
332
333 def clone(self):
334 """Returns a cloned (deep) copy of self."""
335 return Leaf(self.type, self.value,
336 (self.prefix, (self.lineno, self.column)))
337
338 def post_order(self):
339 """Returns a post-order iterator for the tree."""
340 yield self
341
342 def pre_order(self):
343 """Returns a pre-order iterator for the tree."""
344 yield self
345
346 def set_prefix(self, prefix):
347 """Sets the prefix for the node."""
348 self.changed()
349 self.prefix = prefix
350
351 def get_prefix(self):
352 """Returns the prefix for the node."""
353 return self.prefix
354
355
356def convert(gr, raw_node):
357 """Converts raw node information to a Node or Leaf instance.
358
359 This is passed to the parser driver which calls it whenever a
360 reduction of a grammar rule produces a new complete node, so that
361 the tree is build strictly bottom-up.
362 """
363 type, value, context, children = raw_node
364 if children or type in gr.number2symbol:
365 # If there's exactly one child, return that child instead of
366 # creating a new node.
367 if len(children) == 1:
368 return children[0]
369 return Node(type, children, context=context)
370 else:
371 return Leaf(type, value, context=context)
372
373
374class BasePattern(object):
375
376 """A pattern is a tree matching pattern.
377
378 It looks for a specific node type (token or symbol), and
379 optionally for a specific content.
380
381 This is an abstract base class. There are three concrete
382 subclasses:
383
384 - LeafPattern matches a single leaf node;
385 - NodePattern matches a single node (usually non-leaf);
386 - WildcardPattern matches a sequence of nodes of variable length.
387 """
388
389 # Defaults for instance variables
390 type = None # Node type (token if < 256, symbol if >= 256)
391 content = None # Optional content matching pattern
392 name = None # Optional name used to store match in results dict
393
394 def __new__(cls, *args, **kwds):
395 """Constructor that prevents BasePattern from being instantiated."""
396 assert cls is not BasePattern, "Cannot instantiate BasePattern"
397 return object.__new__(cls)
398
399 def __repr__(self):
Martin v. Löwis3faa84f2008-03-22 00:07:09 +0000400 args = [type_repr(self.type), self.content, self.name]
Martin v. Löwisef04c442008-03-19 05:04:44 +0000401 while args and args[-1] is None:
402 del args[-1]
403 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
404
405 def optimize(self):
406 """A subclass can define this as a hook for optimizations.
407
408 Returns either self or another node with the same effect.
409 """
410 return self
411
412 def match(self, node, results=None):
413 """Does this pattern exactly match a node?
414
415 Returns True if it matches, False if not.
416
417 If results is not None, it must be a dict which will be
418 updated with the nodes matching named subpatterns.
419
420 Default implementation for non-wildcard patterns.
421 """
422 if self.type is not None and node.type != self.type:
423 return False
424 if self.content is not None:
425 r = None
426 if results is not None:
427 r = {}
428 if not self._submatch(node, r):
429 return False
430 if r:
431 results.update(r)
432 if results is not None and self.name:
433 results[self.name] = node
434 return True
435
436 def match_seq(self, nodes, results=None):
437 """Does this pattern exactly match a sequence of nodes?
438
439 Default implementation for non-wildcard patterns.
440 """
441 if len(nodes) != 1:
442 return False
443 return self.match(nodes[0], results)
444
445 def generate_matches(self, nodes):
446 """Generator yielding all matches for this pattern.
447
448 Default implementation for non-wildcard patterns.
449 """
450 r = {}
451 if nodes and self.match(nodes[0], r):
452 yield 1, r
453
454
455class LeafPattern(BasePattern):
456
457 def __init__(self, type=None, content=None, name=None):
458 """Initializer. Takes optional type, content, and name.
459
460 The type, if given must be a token type (< 256). If not given,
461 this matches any *leaf* node; the content may still be required.
462
463 The content, if given, must be a string.
464
465 If a name is given, the matching node is stored in the results
466 dict under that key.
467 """
468 if type is not None:
469 assert 0 <= type < 256, type
470 if content is not None:
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000471 assert isinstance(content, str), repr(content)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000472 self.type = type
473 self.content = content
474 self.name = name
475
476 def match(self, node, results=None):
477 """Override match() to insist on a leaf node."""
478 if not isinstance(node, Leaf):
479 return False
480 return BasePattern.match(self, node, results)
481
482 def _submatch(self, node, results=None):
483 """Match the pattern's content to the node's children.
484
485 This assumes the node type matches and self.content is not None.
486
487 Returns True if it matches, False if not.
488
489 If results is not None, it must be a dict which will be
490 updated with the nodes matching named subpatterns.
491
492 When returning False, the results dict may still be updated.
493 """
494 return self.content == node.value
495
496
497class NodePattern(BasePattern):
498
499 wildcards = False
500
501 def __init__(self, type=None, content=None, name=None):
502 """Initializer. Takes optional type, content, and name.
503
504 The type, if given, must be a symbol type (>= 256). If the
505 type is None this matches *any* single node (leaf or not),
506 except if content is not None, in which it only matches
507 non-leaf nodes that also match the content pattern.
508
509 The content, if not None, must be a sequence of Patterns that
510 must match the node's children exactly. If the content is
511 given, the type must not be None.
512
513 If a name is given, the matching node is stored in the results
514 dict under that key.
515 """
516 if type is not None:
517 assert type >= 256, type
518 if content is not None:
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000519 assert not isinstance(content, str), repr(content)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000520 content = list(content)
521 for i, item in enumerate(content):
522 assert isinstance(item, BasePattern), (i, item)
523 if isinstance(item, WildcardPattern):
524 self.wildcards = True
525 self.type = type
526 self.content = content
527 self.name = name
528
529 def _submatch(self, node, results=None):
530 """Match the pattern's content to the node's children.
531
532 This assumes the node type matches and self.content is not None.
533
534 Returns True if it matches, False if not.
535
536 If results is not None, it must be a dict which will be
537 updated with the nodes matching named subpatterns.
538
539 When returning False, the results dict may still be updated.
540 """
541 if self.wildcards:
542 for c, r in generate_matches(self.content, node.children):
543 if c == len(node.children):
544 if results is not None:
545 results.update(r)
546 return True
547 return False
548 if len(self.content) != len(node.children):
549 return False
550 for subpattern, child in zip(self.content, node.children):
551 if not subpattern.match(child, results):
552 return False
553 return True
554
555
556class WildcardPattern(BasePattern):
557
558 """A wildcard pattern can match zero or more nodes.
559
560 This has all the flexibility needed to implement patterns like:
561
562 .* .+ .? .{m,n}
563 (a b c | d e | f)
564 (...)* (...)+ (...)? (...){m,n}
565
566 except it always uses non-greedy matching.
567 """
568
569 def __init__(self, content=None, min=0, max=HUGE, name=None):
570 """Initializer.
571
572 Args:
573 content: optional sequence of subsequences of patterns;
574 if absent, matches one node;
575 if present, each subsequence is an alternative [*]
576 min: optinal minumum number of times to match, default 0
577 max: optional maximum number of times tro match, default HUGE
578 name: optional name assigned to this match
579
580 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
581 equivalent to (a b c | d e | f g h); if content is None,
582 this is equivalent to '.' in regular expression terms.
583 The min and max parameters work as follows:
584 min=0, max=maxint: .*
585 min=1, max=maxint: .+
586 min=0, max=1: .?
587 min=1, max=1: .
588 If content is not None, replace the dot with the parenthesized
589 list of alternatives, e.g. (a b c | d e | f g h)*
590 """
591 assert 0 <= min <= max <= HUGE, (min, max)
592 if content is not None:
593 content = tuple(map(tuple, content)) # Protect against alterations
594 # Check sanity of alternatives
595 assert len(content), repr(content) # Can't have zero alternatives
596 for alt in content:
597 assert len(alt), repr(alt) # Can have empty alternatives
598 self.content = content
599 self.min = min
600 self.max = max
601 self.name = name
602
603 def optimize(self):
604 """Optimize certain stacked wildcard patterns."""
605 subpattern = None
606 if (self.content is not None and
607 len(self.content) == 1 and len(self.content[0]) == 1):
608 subpattern = self.content[0][0]
609 if self.min == 1 and self.max == 1:
610 if self.content is None:
611 return NodePattern(name=self.name)
612 if subpattern is not None and self.name == subpattern.name:
613 return subpattern.optimize()
614 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
615 subpattern.min <= 1 and self.name == subpattern.name):
616 return WildcardPattern(subpattern.content,
617 self.min*subpattern.min,
618 self.max*subpattern.max,
619 subpattern.name)
620 return self
621
622 def match(self, node, results=None):
623 """Does this pattern exactly match a node?"""
624 return self.match_seq([node], results)
625
626 def match_seq(self, nodes, results=None):
627 """Does this pattern exactly match a sequence of nodes?"""
628 for c, r in self.generate_matches(nodes):
629 if c == len(nodes):
630 if results is not None:
631 results.update(r)
632 if self.name:
633 results[self.name] = list(nodes)
634 return True
635 return False
636
637 def generate_matches(self, nodes):
638 """Generator yielding matches for a sequence of nodes.
639
640 Args:
641 nodes: sequence of nodes
642
643 Yields:
644 (count, results) tuples where:
645 count: the match comprises nodes[:count];
646 results: dict containing named submatches.
647 """
648 if self.content is None:
649 # Shortcut for special case (see __init__.__doc__)
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000650 for count in range(self.min, 1 + min(len(nodes), self.max)):
Martin v. Löwisef04c442008-03-19 05:04:44 +0000651 r = {}
652 if self.name:
653 r[self.name] = nodes[:count]
654 yield count, r
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000655 elif self.name == "bare_name":
656 yield self._bare_name_matches(nodes)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000657 else:
658 for count, r in self._recursive_matches(nodes, 0):
659 if self.name:
660 r[self.name] = nodes[:count]
661 yield count, r
662
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000663 def _bare_name_matches(self, nodes):
664 """Special optimized matcher for bare_name."""
665 count = 0
666 r = {}
667 done = False
668 max = len(nodes)
669 while not done and count < max:
670 done = True
671 for leaf in self.content:
672 if leaf[0].match(nodes[count], r):
673 count += 1
674 done = False
675 break
676 r[self.name] = nodes[:count]
677 return count, r
678
Martin v. Löwisef04c442008-03-19 05:04:44 +0000679 def _recursive_matches(self, nodes, count):
680 """Helper to recursively yield the matches."""
681 assert self.content is not None
682 if count >= self.min:
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000683 yield 0, {}
Martin v. Löwisef04c442008-03-19 05:04:44 +0000684 if count < self.max:
685 for alt in self.content:
686 for c0, r0 in generate_matches(alt, nodes):
687 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
688 r = {}
689 r.update(r0)
690 r.update(r1)
691 yield c0 + c1, r
692
693
694class NegatedPattern(BasePattern):
695
696 def __init__(self, content=None):
697 """Initializer.
698
699 The argument is either a pattern or None. If it is None, this
700 only matches an empty sequence (effectively '$' in regex
701 lingo). If it is not None, this matches whenever the argument
702 pattern doesn't have any matches.
703 """
704 if content is not None:
705 assert isinstance(content, BasePattern), repr(content)
706 self.content = content
707
708 def match(self, node):
709 # We never match a node in its entirety
710 return False
711
712 def match_seq(self, nodes):
713 # We only match an empty sequence of nodes in its entirety
714 return len(nodes) == 0
715
716 def generate_matches(self, nodes):
717 if self.content is None:
718 # Return a match if there is an empty sequence
719 if len(nodes) == 0:
720 yield 0, {}
721 else:
722 # Return a match if the argument pattern has no matches
723 for c, r in self.content.generate_matches(nodes):
724 return
725 yield 0, {}
726
727
728def generate_matches(patterns, nodes):
729 """Generator yielding matches for a sequence of patterns and nodes.
730
731 Args:
732 patterns: a sequence of patterns
733 nodes: a sequence of nodes
734
735 Yields:
736 (count, results) tuples where:
737 count: the entire sequence of patterns matches nodes[:count];
738 results: dict containing named submatches.
739 """
740 if not patterns:
741 yield 0, {}
742 else:
743 p, rest = patterns[0], patterns[1:]
744 for c0, r0 in p.generate_matches(nodes):
745 if not rest:
746 yield c0, r0
747 else:
748 for c1, r1 in generate_matches(rest, nodes[c0:]):
749 r = {}
750 r.update(r0)
751 r.update(r1)
752 yield c0 + c1, r