blob: 7584f71bde52c1e8049235047731c224f159504b [file] [log] [blame]
Martin v. Löwis5e37bae2008-03-19 04:43:46 +00001# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
4"""Python parse tree definitions.
5
6This is a very concrete parse tree; we need to keep every token and
7even the comments and whitespace between tokens.
8
9There's also a pattern matching implementation here.
10"""
11
12__author__ = "Guido van Rossum <guido@python.org>"
13
14
15HUGE = 0x7FFFFFFF # maximum repeat count, default max
16
17
18class Base(object):
19
20 """Abstract base class for Node and Leaf.
21
22 This provides some default functionality and boilerplate using the
23 template pattern.
24
25 A node may be a subnode of at most one parent.
26 """
27
28 # Default values for instance variables
29 type = None # int: token number (< 256) or symbol number (>= 256)
30 parent = None # Parent node pointer, or None
31 children = () # Tuple of subnodes
32 was_changed = False
33
34 def __new__(cls, *args, **kwds):
35 """Constructor that prevents Base from being instantiated."""
36 assert cls is not Base, "Cannot instantiate Base"
37 return object.__new__(cls)
38
39 def __eq__(self, other):
40 """Compares two nodes for equality.
41
42 This calls the method _eq().
43 """
44 if self.__class__ is not other.__class__:
45 return NotImplemented
46 return self._eq(other)
47
48 def __ne__(self, other):
49 """Compares two nodes for inequality.
50
51 This calls the method _eq().
52 """
53 if self.__class__ is not other.__class__:
54 return NotImplemented
55 return not self._eq(other)
56
57 def _eq(self, other):
58 """Compares two nodes for equality.
59
60 This is called by __eq__ and __ne__. It is only called if the
61 two nodes have the same type. This must be implemented by the
62 concrete subclass. Nodes should be considered equal if they
63 have the same structure, ignoring the prefix string and other
64 context information.
65 """
66 raise NotImplementedError
67
68 def clone(self):
69 """Returns a cloned (deep) copy of self.
70
71 This must be implemented by the concrete subclass.
72 """
73 raise NotImplementedError
74
75 def post_order(self):
76 """Returns a post-order iterator for the tree.
77
78 This must be implemented by the concrete subclass.
79 """
80 raise NotImplementedError
81
82 def pre_order(self):
83 """Returns a pre-order iterator for the tree.
84
85 This must be implemented by the concrete subclass.
86 """
87 raise NotImplementedError
88
89 def set_prefix(self, prefix):
90 """Sets the prefix for the node (see Leaf class).
91
92 This must be implemented by the concrete subclass.
93 """
94 raise NotImplementedError
95
96 def get_prefix(self):
97 """Returns the prefix for the node (see Leaf class).
98
99 This must be implemented by the concrete subclass.
100 """
101 raise NotImplementedError
102
103 def replace(self, new):
104 """Replaces this node with a new one in the parent."""
105 assert self.parent is not None, str(self)
106 assert new is not None
107 if not isinstance(new, list):
108 new = [new]
109 l_children = []
110 found = False
111 for ch in self.parent.children:
112 if ch is self:
113 assert not found, (self.parent.children, self, new)
114 if new is not None:
115 l_children.extend(new)
116 found = True
117 else:
118 l_children.append(ch)
119 assert found, (self.children, self, new)
120 self.parent.changed()
121 self.parent.children = l_children
122 for x in new:
123 x.parent = self.parent
124 self.parent = None
125
126 def get_lineno(self):
127 """Returns the line number which generated the invocant node."""
128 node = self
129 while not isinstance(node, Leaf):
130 if not node.children:
131 return
132 node = node.children[0]
133 return node.lineno
134
135 def changed(self):
136 if self.parent:
137 self.parent.changed()
138 self.was_changed = True
139
140 def remove(self):
141 """Remove the node from the tree. Returns the position of the node
142 in its parent's children before it was removed."""
143 if self.parent:
144 for i, node in enumerate(self.parent.children):
145 if node is self:
146 self.parent.changed()
147 del self.parent.children[i]
148 self.parent = None
149 return i
150
151 def get_next_sibling(self):
152 """Return the node immediately following the invocant in their
153 parent's children list. If the invocant does not have a next
154 sibling, return None."""
155 if self.parent is None:
156 return None
157
158 # Can't use index(); we need to test by identity
159 for i, sibling in enumerate(self.parent.children):
160 if sibling is self:
161 try:
162 return self.parent.children[i+1]
163 except IndexError:
164 return None
165
166 def get_suffix(self):
167 """Return the string immediately following the invocant node. This
168 is effectively equivalent to node.get_next_sibling().get_prefix()"""
169 next_sib = self.get_next_sibling()
170 if next_sib is None:
171 return ""
172 return next_sib.get_prefix()
173
174
175class Node(Base):
176
177 """Concrete implementation for interior nodes."""
178
179 def __init__(self, type, children, context=None, prefix=None):
180 """Initializer.
181
182 Takes a type constant (a symbol number >= 256), a sequence of
183 child nodes, and an optional context keyword argument.
184
185 As a side effect, the parent pointers of the children are updated.
186 """
187 assert type >= 256, type
188 self.type = type
189 self.children = list(children)
190 for ch in self.children:
191 assert ch.parent is None, repr(ch)
192 ch.parent = self
193 if prefix is not None:
194 self.set_prefix(prefix)
195
196 def __repr__(self):
197 """Returns a canonical string representation."""
198 return "%s(%r, %r)" % (self.__class__.__name__,
199 self.type,
200 self.children)
201
202 def __str__(self):
203 """Returns a pretty string representation.
204
205 This reproduces the input source exactly.
206 """
207 return "".join(map(str, self.children))
208
209 def _eq(self, other):
210 """Compares two nodes for equality."""
211 return (self.type, self.children) == (other.type, other.children)
212
213 def clone(self):
214 """Returns a cloned (deep) copy of self."""
215 return Node(self.type, [ch.clone() for ch in self.children])
216
217 def post_order(self):
218 """Returns a post-order iterator for the tree."""
219 for child in self.children:
220 for node in child.post_order():
221 yield node
222 yield self
223
224 def pre_order(self):
225 """Returns a pre-order iterator for the tree."""
226 yield self
227 for child in self.children:
228 for node in child.post_order():
229 yield node
230
231 def set_prefix(self, prefix):
232 """Sets the prefix for the node.
233
234 This passes the responsibility on to the first child.
235 """
236 if self.children:
237 self.children[0].set_prefix(prefix)
238
239 def get_prefix(self):
240 """Returns the prefix for the node.
241
242 This passes the call on to the first child.
243 """
244 if not self.children:
245 return ""
246 return self.children[0].get_prefix()
247
248 def set_child(self, i, child):
249 """Equivalent to 'node.children[i] = child'. This method also sets the
250 child's parent attribute appropriately."""
251 child.parent = self
252 self.children[i].parent = None
253 self.children[i] = child
254
255 def insert_child(self, i, child):
256 """Equivalent to 'node.children.insert(i, child)'. This method also
257 sets the child's parent attribute appropriately."""
258 child.parent = self
259 self.children.insert(i, child)
260
261 def append_child(self, child):
262 """Equivalent to 'node.children.append(child)'. This method also
263 sets the child's parent attribute appropriately."""
264 child.parent = self
265 self.children.append(child)
266
267
268class Leaf(Base):
269
270 """Concrete implementation for leaf nodes."""
271
272 # Default values for instance variables
273 prefix = "" # Whitespace and comments preceding this token in the input
274 lineno = 0 # Line where this token starts in the input
275 column = 0 # Column where this token tarts in the input
276
277 def __init__(self, type, value, context=None, prefix=None):
278 """Initializer.
279
280 Takes a type constant (a token number < 256), a string value,
281 and an optional context keyword argument.
282 """
283 assert 0 <= type < 256, type
284 if context is not None:
285 self.prefix, (self.lineno, self.column) = context
286 self.type = type
287 self.value = value
288 if prefix is not None:
289 self.prefix = prefix
290
291 def __repr__(self):
292 """Returns a canonical string representation."""
293 return "%s(%r, %r)" % (self.__class__.__name__,
294 self.type,
295 self.value)
296
297 def __str__(self):
298 """Returns a pretty string representation.
299
300 This reproduces the input source exactly.
301 """
302 return self.prefix + str(self.value)
303
304 def _eq(self, other):
305 """Compares two nodes for equality."""
306 return (self.type, self.value) == (other.type, other.value)
307
308 def clone(self):
309 """Returns a cloned (deep) copy of self."""
310 return Leaf(self.type, self.value,
311 (self.prefix, (self.lineno, self.column)))
312
313 def post_order(self):
314 """Returns a post-order iterator for the tree."""
315 yield self
316
317 def pre_order(self):
318 """Returns a pre-order iterator for the tree."""
319 yield self
320
321 def set_prefix(self, prefix):
322 """Sets the prefix for the node."""
323 self.changed()
324 self.prefix = prefix
325
326 def get_prefix(self):
327 """Returns the prefix for the node."""
328 return self.prefix
329
330
331def convert(gr, raw_node):
332 """Converts raw node information to a Node or Leaf instance.
333
334 This is passed to the parser driver which calls it whenever a
335 reduction of a grammar rule produces a new complete node, so that
336 the tree is build strictly bottom-up.
337 """
338 type, value, context, children = raw_node
339 if children or type in gr.number2symbol:
340 # If there's exactly one child, return that child instead of
341 # creating a new node.
342 if len(children) == 1:
343 return children[0]
344 return Node(type, children, context=context)
345 else:
346 return Leaf(type, value, context=context)
347
348
349class BasePattern(object):
350
351 """A pattern is a tree matching pattern.
352
353 It looks for a specific node type (token or symbol), and
354 optionally for a specific content.
355
356 This is an abstract base class. There are three concrete
357 subclasses:
358
359 - LeafPattern matches a single leaf node;
360 - NodePattern matches a single node (usually non-leaf);
361 - WildcardPattern matches a sequence of nodes of variable length.
362 """
363
364 # Defaults for instance variables
365 type = None # Node type (token if < 256, symbol if >= 256)
366 content = None # Optional content matching pattern
367 name = None # Optional name used to store match in results dict
368
369 def __new__(cls, *args, **kwds):
370 """Constructor that prevents BasePattern from being instantiated."""
371 assert cls is not BasePattern, "Cannot instantiate BasePattern"
372 return object.__new__(cls)
373
374 def __repr__(self):
375 args = [self.type, self.content, self.name]
376 while args and args[-1] is None:
377 del args[-1]
378 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
379
380 def optimize(self):
381 """A subclass can define this as a hook for optimizations.
382
383 Returns either self or another node with the same effect.
384 """
385 return self
386
387 def match(self, node, results=None):
388 """Does this pattern exactly match a node?
389
390 Returns True if it matches, False if not.
391
392 If results is not None, it must be a dict which will be
393 updated with the nodes matching named subpatterns.
394
395 Default implementation for non-wildcard patterns.
396 """
397 if self.type is not None and node.type != self.type:
398 return False
399 if self.content is not None:
400 r = None
401 if results is not None:
402 r = {}
403 if not self._submatch(node, r):
404 return False
405 if r:
406 results.update(r)
407 if results is not None and self.name:
408 results[self.name] = node
409 return True
410
411 def match_seq(self, nodes, results=None):
412 """Does this pattern exactly match a sequence of nodes?
413
414 Default implementation for non-wildcard patterns.
415 """
416 if len(nodes) != 1:
417 return False
418 return self.match(nodes[0], results)
419
420 def generate_matches(self, nodes):
421 """Generator yielding all matches for this pattern.
422
423 Default implementation for non-wildcard patterns.
424 """
425 r = {}
426 if nodes and self.match(nodes[0], r):
427 yield 1, r
428
429
430class LeafPattern(BasePattern):
431
432 def __init__(self, type=None, content=None, name=None):
433 """Initializer. Takes optional type, content, and name.
434
435 The type, if given must be a token type (< 256). If not given,
436 this matches any *leaf* node; the content may still be required.
437
438 The content, if given, must be a string.
439
440 If a name is given, the matching node is stored in the results
441 dict under that key.
442 """
443 if type is not None:
444 assert 0 <= type < 256, type
445 if content is not None:
446 assert isinstance(content, basestring), repr(content)
447 self.type = type
448 self.content = content
449 self.name = name
450
451 def match(self, node, results=None):
452 """Override match() to insist on a leaf node."""
453 if not isinstance(node, Leaf):
454 return False
455 return BasePattern.match(self, node, results)
456
457 def _submatch(self, node, results=None):
458 """Match the pattern's content to the node's children.
459
460 This assumes the node type matches and self.content is not None.
461
462 Returns True if it matches, False if not.
463
464 If results is not None, it must be a dict which will be
465 updated with the nodes matching named subpatterns.
466
467 When returning False, the results dict may still be updated.
468 """
469 return self.content == node.value
470
471
472class NodePattern(BasePattern):
473
474 wildcards = False
475
476 def __init__(self, type=None, content=None, name=None):
477 """Initializer. Takes optional type, content, and name.
478
479 The type, if given, must be a symbol type (>= 256). If the
480 type is None this matches *any* single node (leaf or not),
481 except if content is not None, in which it only matches
482 non-leaf nodes that also match the content pattern.
483
484 The content, if not None, must be a sequence of Patterns that
485 must match the node's children exactly. If the content is
486 given, the type must not be None.
487
488 If a name is given, the matching node is stored in the results
489 dict under that key.
490 """
491 if type is not None:
492 assert type >= 256, type
493 if content is not None:
494 assert not isinstance(content, basestring), repr(content)
495 content = list(content)
496 for i, item in enumerate(content):
497 assert isinstance(item, BasePattern), (i, item)
498 if isinstance(item, WildcardPattern):
499 self.wildcards = True
500 self.type = type
501 self.content = content
502 self.name = name
503
504 def _submatch(self, node, results=None):
505 """Match the pattern's content to the node's children.
506
507 This assumes the node type matches and self.content is not None.
508
509 Returns True if it matches, False if not.
510
511 If results is not None, it must be a dict which will be
512 updated with the nodes matching named subpatterns.
513
514 When returning False, the results dict may still be updated.
515 """
516 if self.wildcards:
517 for c, r in generate_matches(self.content, node.children):
518 if c == len(node.children):
519 if results is not None:
520 results.update(r)
521 return True
522 return False
523 if len(self.content) != len(node.children):
524 return False
525 for subpattern, child in zip(self.content, node.children):
526 if not subpattern.match(child, results):
527 return False
528 return True
529
530
531class WildcardPattern(BasePattern):
532
533 """A wildcard pattern can match zero or more nodes.
534
535 This has all the flexibility needed to implement patterns like:
536
537 .* .+ .? .{m,n}
538 (a b c | d e | f)
539 (...)* (...)+ (...)? (...){m,n}
540
541 except it always uses non-greedy matching.
542 """
543
544 def __init__(self, content=None, min=0, max=HUGE, name=None):
545 """Initializer.
546
547 Args:
548 content: optional sequence of subsequences of patterns;
549 if absent, matches one node;
550 if present, each subsequence is an alternative [*]
551 min: optinal minumum number of times to match, default 0
552 max: optional maximum number of times tro match, default HUGE
553 name: optional name assigned to this match
554
555 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
556 equivalent to (a b c | d e | f g h); if content is None,
557 this is equivalent to '.' in regular expression terms.
558 The min and max parameters work as follows:
559 min=0, max=maxint: .*
560 min=1, max=maxint: .+
561 min=0, max=1: .?
562 min=1, max=1: .
563 If content is not None, replace the dot with the parenthesized
564 list of alternatives, e.g. (a b c | d e | f g h)*
565 """
566 assert 0 <= min <= max <= HUGE, (min, max)
567 if content is not None:
568 content = tuple(map(tuple, content)) # Protect against alterations
569 # Check sanity of alternatives
570 assert len(content), repr(content) # Can't have zero alternatives
571 for alt in content:
572 assert len(alt), repr(alt) # Can have empty alternatives
573 self.content = content
574 self.min = min
575 self.max = max
576 self.name = name
577
578 def optimize(self):
579 """Optimize certain stacked wildcard patterns."""
580 subpattern = None
581 if (self.content is not None and
582 len(self.content) == 1 and len(self.content[0]) == 1):
583 subpattern = self.content[0][0]
584 if self.min == 1 and self.max == 1:
585 if self.content is None:
586 return NodePattern(name=self.name)
587 if subpattern is not None and self.name == subpattern.name:
588 return subpattern.optimize()
589 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
590 subpattern.min <= 1 and self.name == subpattern.name):
591 return WildcardPattern(subpattern.content,
592 self.min*subpattern.min,
593 self.max*subpattern.max,
594 subpattern.name)
595 return self
596
597 def match(self, node, results=None):
598 """Does this pattern exactly match a node?"""
599 return self.match_seq([node], results)
600
601 def match_seq(self, nodes, results=None):
602 """Does this pattern exactly match a sequence of nodes?"""
603 for c, r in self.generate_matches(nodes):
604 if c == len(nodes):
605 if results is not None:
606 results.update(r)
607 if self.name:
608 results[self.name] = list(nodes)
609 return True
610 return False
611
612 def generate_matches(self, nodes):
613 """Generator yielding matches for a sequence of nodes.
614
615 Args:
616 nodes: sequence of nodes
617
618 Yields:
619 (count, results) tuples where:
620 count: the match comprises nodes[:count];
621 results: dict containing named submatches.
622 """
623 if self.content is None:
624 # Shortcut for special case (see __init__.__doc__)
625 for count in xrange(self.min, 1 + min(len(nodes), self.max)):
626 r = {}
627 if self.name:
628 r[self.name] = nodes[:count]
629 yield count, r
630 else:
631 for count, r in self._recursive_matches(nodes, 0):
632 if self.name:
633 r[self.name] = nodes[:count]
634 yield count, r
635
636 def _recursive_matches(self, nodes, count):
637 """Helper to recursively yield the matches."""
638 assert self.content is not None
639 if count >= self.min:
640 r = {}
641 if self.name:
642 r[self.name] = nodes[:0]
643 yield 0, r
644 if count < self.max:
645 for alt in self.content:
646 for c0, r0 in generate_matches(alt, nodes):
647 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
648 r = {}
649 r.update(r0)
650 r.update(r1)
651 yield c0 + c1, r
652
653
654class NegatedPattern(BasePattern):
655
656 def __init__(self, content=None):
657 """Initializer.
658
659 The argument is either a pattern or None. If it is None, this
660 only matches an empty sequence (effectively '$' in regex
661 lingo). If it is not None, this matches whenever the argument
662 pattern doesn't have any matches.
663 """
664 if content is not None:
665 assert isinstance(content, BasePattern), repr(content)
666 self.content = content
667
668 def match(self, node):
669 # We never match a node in its entirety
670 return False
671
672 def match_seq(self, nodes):
673 # We only match an empty sequence of nodes in its entirety
674 return len(nodes) == 0
675
676 def generate_matches(self, nodes):
677 if self.content is None:
678 # Return a match if there is an empty sequence
679 if len(nodes) == 0:
680 yield 0, {}
681 else:
682 # Return a match if the argument pattern has no matches
683 for c, r in self.content.generate_matches(nodes):
684 return
685 yield 0, {}
686
687
688def generate_matches(patterns, nodes):
689 """Generator yielding matches for a sequence of patterns and nodes.
690
691 Args:
692 patterns: a sequence of patterns
693 nodes: a sequence of nodes
694
695 Yields:
696 (count, results) tuples where:
697 count: the entire sequence of patterns matches nodes[:count];
698 results: dict containing named submatches.
699 """
700 if not patterns:
701 yield 0, {}
702 else:
703 p, rest = patterns[0], patterns[1:]
704 for c0, r0 in p.generate_matches(nodes):
705 if not rest:
706 yield c0, r0
707 else:
708 for c1, r1 in generate_matches(rest, nodes[c0:]):
709 r = {}
710 r.update(r0)
711 r.update(r1)
712 yield c0 + c1, r