blob: 8b6780afdb1bf2ef8af878b4c9c15cd1adf793f6 [file] [log] [blame]
Martin v. Löwis5e37bae2008-03-19 04:43:46 +00001# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
4"""Python parse tree definitions.
5
6This is a very concrete parse tree; we need to keep every token and
7even the comments and whitespace between tokens.
8
9There's also a pattern matching implementation here.
10"""
11
12__author__ = "Guido van Rossum <guido@python.org>"
13
14
15HUGE = 0x7FFFFFFF # maximum repeat count, default max
16
Martin v. Löwisbaf267c2008-03-22 00:01:12 +000017_type_reprs = {}
18def type_repr(type_num):
19 global _type_reprs
20 if not _type_reprs:
21 from .pygram import python_symbols
22 # printing tokens is possible but not as useful
23 # from .pgen2 import token // token.__dict__.items():
24 for name, val in python_symbols.__dict__.items():
25 if type(val) == int: _type_reprs[val] = name
26 return _type_reprs.setdefault(type_num, type_num)
27
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000028
29class Base(object):
30
31 """Abstract base class for Node and Leaf.
32
33 This provides some default functionality and boilerplate using the
34 template pattern.
35
36 A node may be a subnode of at most one parent.
37 """
38
39 # Default values for instance variables
40 type = None # int: token number (< 256) or symbol number (>= 256)
41 parent = None # Parent node pointer, or None
42 children = () # Tuple of subnodes
43 was_changed = False
44
45 def __new__(cls, *args, **kwds):
46 """Constructor that prevents Base from being instantiated."""
47 assert cls is not Base, "Cannot instantiate Base"
48 return object.__new__(cls)
49
50 def __eq__(self, other):
51 """Compares two nodes for equality.
52
53 This calls the method _eq().
54 """
55 if self.__class__ is not other.__class__:
56 return NotImplemented
57 return self._eq(other)
58
59 def __ne__(self, other):
60 """Compares two nodes for inequality.
61
62 This calls the method _eq().
63 """
64 if self.__class__ is not other.__class__:
65 return NotImplemented
66 return not self._eq(other)
67
68 def _eq(self, other):
69 """Compares two nodes for equality.
70
71 This is called by __eq__ and __ne__. It is only called if the
72 two nodes have the same type. This must be implemented by the
73 concrete subclass. Nodes should be considered equal if they
74 have the same structure, ignoring the prefix string and other
75 context information.
76 """
77 raise NotImplementedError
78
79 def clone(self):
80 """Returns a cloned (deep) copy of self.
81
82 This must be implemented by the concrete subclass.
83 """
84 raise NotImplementedError
85
86 def post_order(self):
87 """Returns a post-order iterator for the tree.
88
89 This must be implemented by the concrete subclass.
90 """
91 raise NotImplementedError
92
93 def pre_order(self):
94 """Returns a pre-order iterator for the tree.
95
96 This must be implemented by the concrete subclass.
97 """
98 raise NotImplementedError
99
100 def set_prefix(self, prefix):
101 """Sets the prefix for the node (see Leaf class).
102
103 This must be implemented by the concrete subclass.
104 """
105 raise NotImplementedError
106
107 def get_prefix(self):
108 """Returns the prefix for the node (see Leaf class).
109
110 This must be implemented by the concrete subclass.
111 """
112 raise NotImplementedError
113
114 def replace(self, new):
115 """Replaces this node with a new one in the parent."""
116 assert self.parent is not None, str(self)
117 assert new is not None
118 if not isinstance(new, list):
119 new = [new]
120 l_children = []
121 found = False
122 for ch in self.parent.children:
123 if ch is self:
124 assert not found, (self.parent.children, self, new)
125 if new is not None:
126 l_children.extend(new)
127 found = True
128 else:
129 l_children.append(ch)
130 assert found, (self.children, self, new)
131 self.parent.changed()
132 self.parent.children = l_children
133 for x in new:
134 x.parent = self.parent
135 self.parent = None
136
137 def get_lineno(self):
138 """Returns the line number which generated the invocant node."""
139 node = self
140 while not isinstance(node, Leaf):
141 if not node.children:
142 return
143 node = node.children[0]
144 return node.lineno
145
146 def changed(self):
147 if self.parent:
148 self.parent.changed()
149 self.was_changed = True
150
151 def remove(self):
152 """Remove the node from the tree. Returns the position of the node
153 in its parent's children before it was removed."""
154 if self.parent:
155 for i, node in enumerate(self.parent.children):
156 if node is self:
157 self.parent.changed()
158 del self.parent.children[i]
159 self.parent = None
160 return i
161
162 def get_next_sibling(self):
163 """Return the node immediately following the invocant in their
164 parent's children list. If the invocant does not have a next
165 sibling, return None."""
166 if self.parent is None:
167 return None
168
169 # Can't use index(); we need to test by identity
170 for i, sibling in enumerate(self.parent.children):
171 if sibling is self:
172 try:
173 return self.parent.children[i+1]
174 except IndexError:
175 return None
176
177 def get_suffix(self):
178 """Return the string immediately following the invocant node. This
179 is effectively equivalent to node.get_next_sibling().get_prefix()"""
180 next_sib = self.get_next_sibling()
181 if next_sib is None:
182 return ""
183 return next_sib.get_prefix()
184
185
186class Node(Base):
187
188 """Concrete implementation for interior nodes."""
189
190 def __init__(self, type, children, context=None, prefix=None):
191 """Initializer.
192
193 Takes a type constant (a symbol number >= 256), a sequence of
194 child nodes, and an optional context keyword argument.
195
196 As a side effect, the parent pointers of the children are updated.
197 """
198 assert type >= 256, type
199 self.type = type
200 self.children = list(children)
201 for ch in self.children:
202 assert ch.parent is None, repr(ch)
203 ch.parent = self
204 if prefix is not None:
205 self.set_prefix(prefix)
206
207 def __repr__(self):
208 """Returns a canonical string representation."""
Martin v. Löwisbaf267c2008-03-22 00:01:12 +0000209 return "%s(%s, %r)" % (self.__class__.__name__,
210 type_repr(self.type),
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000211 self.children)
212
213 def __str__(self):
214 """Returns a pretty string representation.
215
216 This reproduces the input source exactly.
217 """
218 return "".join(map(str, self.children))
219
220 def _eq(self, other):
221 """Compares two nodes for equality."""
222 return (self.type, self.children) == (other.type, other.children)
223
224 def clone(self):
225 """Returns a cloned (deep) copy of self."""
226 return Node(self.type, [ch.clone() for ch in self.children])
227
228 def post_order(self):
229 """Returns a post-order iterator for the tree."""
230 for child in self.children:
231 for node in child.post_order():
232 yield node
233 yield self
234
235 def pre_order(self):
236 """Returns a pre-order iterator for the tree."""
237 yield self
238 for child in self.children:
239 for node in child.post_order():
240 yield node
241
242 def set_prefix(self, prefix):
243 """Sets the prefix for the node.
244
245 This passes the responsibility on to the first child.
246 """
247 if self.children:
248 self.children[0].set_prefix(prefix)
249
250 def get_prefix(self):
251 """Returns the prefix for the node.
252
253 This passes the call on to the first child.
254 """
255 if not self.children:
256 return ""
257 return self.children[0].get_prefix()
258
259 def set_child(self, i, child):
260 """Equivalent to 'node.children[i] = child'. This method also sets the
261 child's parent attribute appropriately."""
262 child.parent = self
263 self.children[i].parent = None
264 self.children[i] = child
265
266 def insert_child(self, i, child):
267 """Equivalent to 'node.children.insert(i, child)'. This method also
268 sets the child's parent attribute appropriately."""
269 child.parent = self
270 self.children.insert(i, child)
271
272 def append_child(self, child):
273 """Equivalent to 'node.children.append(child)'. This method also
274 sets the child's parent attribute appropriately."""
275 child.parent = self
276 self.children.append(child)
277
278
279class Leaf(Base):
280
281 """Concrete implementation for leaf nodes."""
282
283 # Default values for instance variables
284 prefix = "" # Whitespace and comments preceding this token in the input
285 lineno = 0 # Line where this token starts in the input
286 column = 0 # Column where this token tarts in the input
287
288 def __init__(self, type, value, context=None, prefix=None):
289 """Initializer.
290
291 Takes a type constant (a token number < 256), a string value,
292 and an optional context keyword argument.
293 """
294 assert 0 <= type < 256, type
295 if context is not None:
296 self.prefix, (self.lineno, self.column) = context
297 self.type = type
298 self.value = value
299 if prefix is not None:
300 self.prefix = prefix
301
302 def __repr__(self):
303 """Returns a canonical string representation."""
304 return "%s(%r, %r)" % (self.__class__.__name__,
305 self.type,
306 self.value)
307
308 def __str__(self):
309 """Returns a pretty string representation.
310
311 This reproduces the input source exactly.
312 """
313 return self.prefix + str(self.value)
314
315 def _eq(self, other):
316 """Compares two nodes for equality."""
317 return (self.type, self.value) == (other.type, other.value)
318
319 def clone(self):
320 """Returns a cloned (deep) copy of self."""
321 return Leaf(self.type, self.value,
322 (self.prefix, (self.lineno, self.column)))
323
324 def post_order(self):
325 """Returns a post-order iterator for the tree."""
326 yield self
327
328 def pre_order(self):
329 """Returns a pre-order iterator for the tree."""
330 yield self
331
332 def set_prefix(self, prefix):
333 """Sets the prefix for the node."""
334 self.changed()
335 self.prefix = prefix
336
337 def get_prefix(self):
338 """Returns the prefix for the node."""
339 return self.prefix
340
341
342def convert(gr, raw_node):
343 """Converts raw node information to a Node or Leaf instance.
344
345 This is passed to the parser driver which calls it whenever a
346 reduction of a grammar rule produces a new complete node, so that
347 the tree is build strictly bottom-up.
348 """
349 type, value, context, children = raw_node
350 if children or type in gr.number2symbol:
351 # If there's exactly one child, return that child instead of
352 # creating a new node.
353 if len(children) == 1:
354 return children[0]
355 return Node(type, children, context=context)
356 else:
357 return Leaf(type, value, context=context)
358
359
360class BasePattern(object):
361
362 """A pattern is a tree matching pattern.
363
364 It looks for a specific node type (token or symbol), and
365 optionally for a specific content.
366
367 This is an abstract base class. There are three concrete
368 subclasses:
369
370 - LeafPattern matches a single leaf node;
371 - NodePattern matches a single node (usually non-leaf);
372 - WildcardPattern matches a sequence of nodes of variable length.
373 """
374
375 # Defaults for instance variables
376 type = None # Node type (token if < 256, symbol if >= 256)
377 content = None # Optional content matching pattern
378 name = None # Optional name used to store match in results dict
379
380 def __new__(cls, *args, **kwds):
381 """Constructor that prevents BasePattern from being instantiated."""
382 assert cls is not BasePattern, "Cannot instantiate BasePattern"
383 return object.__new__(cls)
384
385 def __repr__(self):
Martin v. Löwisbaf267c2008-03-22 00:01:12 +0000386 args = [type_repr(self.type), self.content, self.name]
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000387 while args and args[-1] is None:
388 del args[-1]
389 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
390
391 def optimize(self):
392 """A subclass can define this as a hook for optimizations.
393
394 Returns either self or another node with the same effect.
395 """
396 return self
397
398 def match(self, node, results=None):
399 """Does this pattern exactly match a node?
400
401 Returns True if it matches, False if not.
402
403 If results is not None, it must be a dict which will be
404 updated with the nodes matching named subpatterns.
405
406 Default implementation for non-wildcard patterns.
407 """
408 if self.type is not None and node.type != self.type:
409 return False
410 if self.content is not None:
411 r = None
412 if results is not None:
413 r = {}
414 if not self._submatch(node, r):
415 return False
416 if r:
417 results.update(r)
418 if results is not None and self.name:
419 results[self.name] = node
420 return True
421
422 def match_seq(self, nodes, results=None):
423 """Does this pattern exactly match a sequence of nodes?
424
425 Default implementation for non-wildcard patterns.
426 """
427 if len(nodes) != 1:
428 return False
429 return self.match(nodes[0], results)
430
431 def generate_matches(self, nodes):
432 """Generator yielding all matches for this pattern.
433
434 Default implementation for non-wildcard patterns.
435 """
436 r = {}
437 if nodes and self.match(nodes[0], r):
438 yield 1, r
439
440
441class LeafPattern(BasePattern):
442
443 def __init__(self, type=None, content=None, name=None):
444 """Initializer. Takes optional type, content, and name.
445
446 The type, if given must be a token type (< 256). If not given,
447 this matches any *leaf* node; the content may still be required.
448
449 The content, if given, must be a string.
450
451 If a name is given, the matching node is stored in the results
452 dict under that key.
453 """
454 if type is not None:
455 assert 0 <= type < 256, type
456 if content is not None:
457 assert isinstance(content, basestring), repr(content)
458 self.type = type
459 self.content = content
460 self.name = name
461
462 def match(self, node, results=None):
463 """Override match() to insist on a leaf node."""
464 if not isinstance(node, Leaf):
465 return False
466 return BasePattern.match(self, node, results)
467
468 def _submatch(self, node, results=None):
469 """Match the pattern's content to the node's children.
470
471 This assumes the node type matches and self.content is not None.
472
473 Returns True if it matches, False if not.
474
475 If results is not None, it must be a dict which will be
476 updated with the nodes matching named subpatterns.
477
478 When returning False, the results dict may still be updated.
479 """
480 return self.content == node.value
481
482
483class NodePattern(BasePattern):
484
485 wildcards = False
486
487 def __init__(self, type=None, content=None, name=None):
488 """Initializer. Takes optional type, content, and name.
489
490 The type, if given, must be a symbol type (>= 256). If the
491 type is None this matches *any* single node (leaf or not),
492 except if content is not None, in which it only matches
493 non-leaf nodes that also match the content pattern.
494
495 The content, if not None, must be a sequence of Patterns that
496 must match the node's children exactly. If the content is
497 given, the type must not be None.
498
499 If a name is given, the matching node is stored in the results
500 dict under that key.
501 """
502 if type is not None:
503 assert type >= 256, type
504 if content is not None:
505 assert not isinstance(content, basestring), repr(content)
506 content = list(content)
507 for i, item in enumerate(content):
508 assert isinstance(item, BasePattern), (i, item)
509 if isinstance(item, WildcardPattern):
510 self.wildcards = True
511 self.type = type
512 self.content = content
513 self.name = name
514
515 def _submatch(self, node, results=None):
516 """Match the pattern's content to the node's children.
517
518 This assumes the node type matches and self.content is not None.
519
520 Returns True if it matches, False if not.
521
522 If results is not None, it must be a dict which will be
523 updated with the nodes matching named subpatterns.
524
525 When returning False, the results dict may still be updated.
526 """
527 if self.wildcards:
528 for c, r in generate_matches(self.content, node.children):
529 if c == len(node.children):
530 if results is not None:
531 results.update(r)
532 return True
533 return False
534 if len(self.content) != len(node.children):
535 return False
536 for subpattern, child in zip(self.content, node.children):
537 if not subpattern.match(child, results):
538 return False
539 return True
540
541
542class WildcardPattern(BasePattern):
543
544 """A wildcard pattern can match zero or more nodes.
545
546 This has all the flexibility needed to implement patterns like:
547
548 .* .+ .? .{m,n}
549 (a b c | d e | f)
550 (...)* (...)+ (...)? (...){m,n}
551
552 except it always uses non-greedy matching.
553 """
554
555 def __init__(self, content=None, min=0, max=HUGE, name=None):
556 """Initializer.
557
558 Args:
559 content: optional sequence of subsequences of patterns;
560 if absent, matches one node;
561 if present, each subsequence is an alternative [*]
562 min: optinal minumum number of times to match, default 0
563 max: optional maximum number of times tro match, default HUGE
564 name: optional name assigned to this match
565
566 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
567 equivalent to (a b c | d e | f g h); if content is None,
568 this is equivalent to '.' in regular expression terms.
569 The min and max parameters work as follows:
570 min=0, max=maxint: .*
571 min=1, max=maxint: .+
572 min=0, max=1: .?
573 min=1, max=1: .
574 If content is not None, replace the dot with the parenthesized
575 list of alternatives, e.g. (a b c | d e | f g h)*
576 """
577 assert 0 <= min <= max <= HUGE, (min, max)
578 if content is not None:
579 content = tuple(map(tuple, content)) # Protect against alterations
580 # Check sanity of alternatives
581 assert len(content), repr(content) # Can't have zero alternatives
582 for alt in content:
583 assert len(alt), repr(alt) # Can have empty alternatives
584 self.content = content
585 self.min = min
586 self.max = max
587 self.name = name
588
589 def optimize(self):
590 """Optimize certain stacked wildcard patterns."""
591 subpattern = None
592 if (self.content is not None and
593 len(self.content) == 1 and len(self.content[0]) == 1):
594 subpattern = self.content[0][0]
595 if self.min == 1 and self.max == 1:
596 if self.content is None:
597 return NodePattern(name=self.name)
598 if subpattern is not None and self.name == subpattern.name:
599 return subpattern.optimize()
600 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
601 subpattern.min <= 1 and self.name == subpattern.name):
602 return WildcardPattern(subpattern.content,
603 self.min*subpattern.min,
604 self.max*subpattern.max,
605 subpattern.name)
606 return self
607
608 def match(self, node, results=None):
609 """Does this pattern exactly match a node?"""
610 return self.match_seq([node], results)
611
612 def match_seq(self, nodes, results=None):
613 """Does this pattern exactly match a sequence of nodes?"""
614 for c, r in self.generate_matches(nodes):
615 if c == len(nodes):
616 if results is not None:
617 results.update(r)
618 if self.name:
619 results[self.name] = list(nodes)
620 return True
621 return False
622
623 def generate_matches(self, nodes):
624 """Generator yielding matches for a sequence of nodes.
625
626 Args:
627 nodes: sequence of nodes
628
629 Yields:
630 (count, results) tuples where:
631 count: the match comprises nodes[:count];
632 results: dict containing named submatches.
633 """
634 if self.content is None:
635 # Shortcut for special case (see __init__.__doc__)
636 for count in xrange(self.min, 1 + min(len(nodes), self.max)):
637 r = {}
638 if self.name:
639 r[self.name] = nodes[:count]
640 yield count, r
641 else:
642 for count, r in self._recursive_matches(nodes, 0):
643 if self.name:
644 r[self.name] = nodes[:count]
645 yield count, r
646
647 def _recursive_matches(self, nodes, count):
648 """Helper to recursively yield the matches."""
649 assert self.content is not None
650 if count >= self.min:
651 r = {}
652 if self.name:
653 r[self.name] = nodes[:0]
654 yield 0, r
655 if count < self.max:
656 for alt in self.content:
657 for c0, r0 in generate_matches(alt, nodes):
658 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
659 r = {}
660 r.update(r0)
661 r.update(r1)
662 yield c0 + c1, r
663
664
665class NegatedPattern(BasePattern):
666
667 def __init__(self, content=None):
668 """Initializer.
669
670 The argument is either a pattern or None. If it is None, this
671 only matches an empty sequence (effectively '$' in regex
672 lingo). If it is not None, this matches whenever the argument
673 pattern doesn't have any matches.
674 """
675 if content is not None:
676 assert isinstance(content, BasePattern), repr(content)
677 self.content = content
678
679 def match(self, node):
680 # We never match a node in its entirety
681 return False
682
683 def match_seq(self, nodes):
684 # We only match an empty sequence of nodes in its entirety
685 return len(nodes) == 0
686
687 def generate_matches(self, nodes):
688 if self.content is None:
689 # Return a match if there is an empty sequence
690 if len(nodes) == 0:
691 yield 0, {}
692 else:
693 # Return a match if the argument pattern has no matches
694 for c, r in self.content.generate_matches(nodes):
695 return
696 yield 0, {}
697
698
699def generate_matches(patterns, nodes):
700 """Generator yielding matches for a sequence of patterns and nodes.
701
702 Args:
703 patterns: a sequence of patterns
704 nodes: a sequence of nodes
705
706 Yields:
707 (count, results) tuples where:
708 count: the entire sequence of patterns matches nodes[:count];
709 results: dict containing named submatches.
710 """
711 if not patterns:
712 yield 0, {}
713 else:
714 p, rest = patterns[0], patterns[1:]
715 for c0, r0 in p.generate_matches(nodes):
716 if not rest:
717 yield c0, r0
718 else:
719 for c1, r1 in generate_matches(rest, nodes[c0:]):
720 r = {}
721 r.update(r0)
722 r.update(r1)
723 yield c0 + c1, r