blob: 3a36da283bb9c92ddb9f01316a6e8e79a4948a30 [file] [log] [blame]
Martin v. Löwis5e37bae2008-03-19 04:43:46 +00001# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +00004"""
5Python parse tree definitions.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +00006
7This is a very concrete parse tree; we need to keep every token and
8even the comments and whitespace between tokens.
9
10There's also a pattern matching implementation here.
11"""
12
13__author__ = "Guido van Rossum <guido@python.org>"
14
Benjamin Peterson2555d9d2008-11-28 22:12:14 +000015import sys
16from StringIO import StringIO
17
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000018
19HUGE = 0x7FFFFFFF # maximum repeat count, default max
20
Martin v. Löwisbaf267c2008-03-22 00:01:12 +000021_type_reprs = {}
22def type_repr(type_num):
23 global _type_reprs
24 if not _type_reprs:
25 from .pygram import python_symbols
26 # printing tokens is possible but not as useful
27 # from .pgen2 import token // token.__dict__.items():
28 for name, val in python_symbols.__dict__.items():
29 if type(val) == int: _type_reprs[val] = name
30 return _type_reprs.setdefault(type_num, type_num)
31
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000032
33class Base(object):
34
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +000035 """
36 Abstract base class for Node and Leaf.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000037
38 This provides some default functionality and boilerplate using the
39 template pattern.
40
41 A node may be a subnode of at most one parent.
42 """
43
44 # Default values for instance variables
45 type = None # int: token number (< 256) or symbol number (>= 256)
46 parent = None # Parent node pointer, or None
47 children = () # Tuple of subnodes
48 was_changed = False
49
50 def __new__(cls, *args, **kwds):
51 """Constructor that prevents Base from being instantiated."""
52 assert cls is not Base, "Cannot instantiate Base"
53 return object.__new__(cls)
54
55 def __eq__(self, other):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +000056 """
57 Compare two nodes for equality.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000058
59 This calls the method _eq().
60 """
61 if self.__class__ is not other.__class__:
62 return NotImplemented
63 return self._eq(other)
64
65 def __ne__(self, other):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +000066 """
67 Compare two nodes for inequality.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000068
69 This calls the method _eq().
70 """
71 if self.__class__ is not other.__class__:
72 return NotImplemented
73 return not self._eq(other)
74
75 def _eq(self, other):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +000076 """
77 Compare two nodes for equality.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000078
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +000079 This is called by __eq__ and __ne__. It is only called if the two nodes
80 have the same type. This must be implemented by the concrete subclass.
81 Nodes should be considered equal if they have the same structure,
82 ignoring the prefix string and other context information.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000083 """
84 raise NotImplementedError
85
86 def clone(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +000087 """
88 Return a cloned (deep) copy of self.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000089
90 This must be implemented by the concrete subclass.
91 """
92 raise NotImplementedError
93
94 def post_order(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +000095 """
96 Return a post-order iterator for the tree.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000097
98 This must be implemented by the concrete subclass.
99 """
100 raise NotImplementedError
101
102 def pre_order(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000103 """
104 Return a pre-order iterator for the tree.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000105
106 This must be implemented by the concrete subclass.
107 """
108 raise NotImplementedError
109
110 def set_prefix(self, prefix):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000111 """
112 Set the prefix for the node (see Leaf class).
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000113
114 This must be implemented by the concrete subclass.
115 """
116 raise NotImplementedError
117
118 def get_prefix(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000119 """
120 Return the prefix for the node (see Leaf class).
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000121
122 This must be implemented by the concrete subclass.
123 """
124 raise NotImplementedError
125
126 def replace(self, new):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000127 """Replace this node with a new one in the parent."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000128 assert self.parent is not None, str(self)
129 assert new is not None
130 if not isinstance(new, list):
131 new = [new]
132 l_children = []
133 found = False
134 for ch in self.parent.children:
135 if ch is self:
136 assert not found, (self.parent.children, self, new)
137 if new is not None:
138 l_children.extend(new)
139 found = True
140 else:
141 l_children.append(ch)
142 assert found, (self.children, self, new)
143 self.parent.changed()
144 self.parent.children = l_children
145 for x in new:
146 x.parent = self.parent
147 self.parent = None
148
149 def get_lineno(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000150 """Return the line number which generated the invocant node."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000151 node = self
152 while not isinstance(node, Leaf):
153 if not node.children:
154 return
155 node = node.children[0]
156 return node.lineno
157
158 def changed(self):
159 if self.parent:
160 self.parent.changed()
161 self.was_changed = True
162
163 def remove(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000164 """
165 Remove the node from the tree. Returns the position of the node in its
166 parent's children before it was removed.
167 """
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000168 if self.parent:
169 for i, node in enumerate(self.parent.children):
170 if node is self:
171 self.parent.changed()
172 del self.parent.children[i]
173 self.parent = None
174 return i
175
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000176 @property
177 def next_sibling(self):
178 """
179 The node immediately following the invocant in their parent's children
180 list. If the invocant does not have a next sibling, it is None
181 """
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000182 if self.parent is None:
183 return None
184
185 # Can't use index(); we need to test by identity
Martin v. Löwis60a819d2008-04-10 02:48:01 +0000186 for i, child in enumerate(self.parent.children):
187 if child is self:
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000188 try:
189 return self.parent.children[i+1]
190 except IndexError:
191 return None
192
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000193 @property
194 def prev_sibling(self):
195 """
196 The node immediately preceding the invocant in their parent's children
197 list. If the invocant does not have a previous sibling, it is None.
198 """
Martin v. Löwis60a819d2008-04-10 02:48:01 +0000199 if self.parent is None:
200 return None
201
202 # Can't use index(); we need to test by identity
203 for i, child in enumerate(self.parent.children):
204 if child is self:
205 if i == 0:
206 return None
207 return self.parent.children[i-1]
208
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000209 def get_suffix(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000210 """
211 Return the string immediately following the invocant node. This is
212 effectively equivalent to node.next_sibling.get_prefix()
213 """
214 next_sib = self.next_sibling
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000215 if next_sib is None:
216 return ""
217 return next_sib.get_prefix()
218
219
220class Node(Base):
221
222 """Concrete implementation for interior nodes."""
223
224 def __init__(self, type, children, context=None, prefix=None):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000225 """
226 Initializer.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000227
228 Takes a type constant (a symbol number >= 256), a sequence of
229 child nodes, and an optional context keyword argument.
230
231 As a side effect, the parent pointers of the children are updated.
232 """
233 assert type >= 256, type
234 self.type = type
235 self.children = list(children)
236 for ch in self.children:
237 assert ch.parent is None, repr(ch)
238 ch.parent = self
239 if prefix is not None:
240 self.set_prefix(prefix)
241
242 def __repr__(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000243 """Return a canonical string representation."""
Martin v. Löwisbaf267c2008-03-22 00:01:12 +0000244 return "%s(%s, %r)" % (self.__class__.__name__,
245 type_repr(self.type),
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000246 self.children)
247
248 def __str__(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000249 """
250 Return a pretty string representation.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000251
252 This reproduces the input source exactly.
253 """
254 return "".join(map(str, self.children))
255
256 def _eq(self, other):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000257 """Compare two nodes for equality."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000258 return (self.type, self.children) == (other.type, other.children)
259
260 def clone(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000261 """Return a cloned (deep) copy of self."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000262 return Node(self.type, [ch.clone() for ch in self.children])
263
264 def post_order(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000265 """Return a post-order iterator for the tree."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000266 for child in self.children:
267 for node in child.post_order():
268 yield node
269 yield self
270
271 def pre_order(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000272 """Return a pre-order iterator for the tree."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000273 yield self
274 for child in self.children:
275 for node in child.post_order():
276 yield node
277
278 def set_prefix(self, prefix):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000279 """
280 Set the prefix for the node.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000281
282 This passes the responsibility on to the first child.
283 """
284 if self.children:
285 self.children[0].set_prefix(prefix)
286
287 def get_prefix(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000288 """
289 Return the prefix for the node.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000290
291 This passes the call on to the first child.
292 """
293 if not self.children:
294 return ""
295 return self.children[0].get_prefix()
296
297 def set_child(self, i, child):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000298 """
299 Equivalent to 'node.children[i] = child'. This method also sets the
300 child's parent attribute appropriately.
301 """
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000302 child.parent = self
303 self.children[i].parent = None
304 self.children[i] = child
Benjamin Peterson43caaa02008-12-16 03:35:28 +0000305 self.changed()
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000306
307 def insert_child(self, i, child):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000308 """
309 Equivalent to 'node.children.insert(i, child)'. This method also sets
310 the child's parent attribute appropriately.
311 """
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000312 child.parent = self
313 self.children.insert(i, child)
Benjamin Peterson43caaa02008-12-16 03:35:28 +0000314 self.changed()
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000315
316 def append_child(self, child):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000317 """
318 Equivalent to 'node.children.append(child)'. This method also sets the
319 child's parent attribute appropriately.
320 """
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000321 child.parent = self
322 self.children.append(child)
Benjamin Peterson43caaa02008-12-16 03:35:28 +0000323 self.changed()
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000324
325
326class Leaf(Base):
327
328 """Concrete implementation for leaf nodes."""
329
330 # Default values for instance variables
331 prefix = "" # Whitespace and comments preceding this token in the input
332 lineno = 0 # Line where this token starts in the input
333 column = 0 # Column where this token tarts in the input
334
335 def __init__(self, type, value, context=None, prefix=None):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000336 """
337 Initializer.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000338
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000339 Takes a type constant (a token number < 256), a string value, and an
340 optional context keyword argument.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000341 """
342 assert 0 <= type < 256, type
343 if context is not None:
344 self.prefix, (self.lineno, self.column) = context
345 self.type = type
346 self.value = value
347 if prefix is not None:
348 self.prefix = prefix
349
350 def __repr__(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000351 """Return a canonical string representation."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000352 return "%s(%r, %r)" % (self.__class__.__name__,
353 self.type,
354 self.value)
355
356 def __str__(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000357 """
358 Return a pretty string representation.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000359
360 This reproduces the input source exactly.
361 """
362 return self.prefix + str(self.value)
363
364 def _eq(self, other):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000365 """Compare two nodes for equality."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000366 return (self.type, self.value) == (other.type, other.value)
367
368 def clone(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000369 """Return a cloned (deep) copy of self."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000370 return Leaf(self.type, self.value,
371 (self.prefix, (self.lineno, self.column)))
372
373 def post_order(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000374 """Return a post-order iterator for the tree."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000375 yield self
376
377 def pre_order(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000378 """Return a pre-order iterator for the tree."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000379 yield self
380
381 def set_prefix(self, prefix):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000382 """Set the prefix for the node."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000383 self.changed()
384 self.prefix = prefix
385
386 def get_prefix(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000387 """Return the prefix for the node."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000388 return self.prefix
389
390
391def convert(gr, raw_node):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000392 """
393 Convert raw node information to a Node or Leaf instance.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000394
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000395 This is passed to the parser driver which calls it whenever a reduction of a
396 grammar rule produces a new complete node, so that the tree is build
397 strictly bottom-up.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000398 """
399 type, value, context, children = raw_node
400 if children or type in gr.number2symbol:
401 # If there's exactly one child, return that child instead of
402 # creating a new node.
403 if len(children) == 1:
404 return children[0]
405 return Node(type, children, context=context)
406 else:
407 return Leaf(type, value, context=context)
408
409
410class BasePattern(object):
411
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000412 """
413 A pattern is a tree matching pattern.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000414
415 It looks for a specific node type (token or symbol), and
416 optionally for a specific content.
417
418 This is an abstract base class. There are three concrete
419 subclasses:
420
421 - LeafPattern matches a single leaf node;
422 - NodePattern matches a single node (usually non-leaf);
423 - WildcardPattern matches a sequence of nodes of variable length.
424 """
425
426 # Defaults for instance variables
427 type = None # Node type (token if < 256, symbol if >= 256)
428 content = None # Optional content matching pattern
429 name = None # Optional name used to store match in results dict
430
431 def __new__(cls, *args, **kwds):
432 """Constructor that prevents BasePattern from being instantiated."""
433 assert cls is not BasePattern, "Cannot instantiate BasePattern"
434 return object.__new__(cls)
435
436 def __repr__(self):
Martin v. Löwisbaf267c2008-03-22 00:01:12 +0000437 args = [type_repr(self.type), self.content, self.name]
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000438 while args and args[-1] is None:
439 del args[-1]
440 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
441
442 def optimize(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000443 """
444 A subclass can define this as a hook for optimizations.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000445
446 Returns either self or another node with the same effect.
447 """
448 return self
449
450 def match(self, node, results=None):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000451 """
452 Does this pattern exactly match a node?
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000453
454 Returns True if it matches, False if not.
455
456 If results is not None, it must be a dict which will be
457 updated with the nodes matching named subpatterns.
458
459 Default implementation for non-wildcard patterns.
460 """
461 if self.type is not None and node.type != self.type:
462 return False
463 if self.content is not None:
464 r = None
465 if results is not None:
466 r = {}
467 if not self._submatch(node, r):
468 return False
469 if r:
470 results.update(r)
471 if results is not None and self.name:
472 results[self.name] = node
473 return True
474
475 def match_seq(self, nodes, results=None):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000476 """
477 Does this pattern exactly match a sequence of nodes?
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000478
479 Default implementation for non-wildcard patterns.
480 """
481 if len(nodes) != 1:
482 return False
483 return self.match(nodes[0], results)
484
485 def generate_matches(self, nodes):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000486 """
487 Generator yielding all matches for this pattern.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000488
489 Default implementation for non-wildcard patterns.
490 """
491 r = {}
492 if nodes and self.match(nodes[0], r):
493 yield 1, r
494
495
496class LeafPattern(BasePattern):
497
498 def __init__(self, type=None, content=None, name=None):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000499 """
500 Initializer. Takes optional type, content, and name.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000501
502 The type, if given must be a token type (< 256). If not given,
503 this matches any *leaf* node; the content may still be required.
504
505 The content, if given, must be a string.
506
507 If a name is given, the matching node is stored in the results
508 dict under that key.
509 """
510 if type is not None:
511 assert 0 <= type < 256, type
512 if content is not None:
513 assert isinstance(content, basestring), repr(content)
514 self.type = type
515 self.content = content
516 self.name = name
517
518 def match(self, node, results=None):
519 """Override match() to insist on a leaf node."""
520 if not isinstance(node, Leaf):
521 return False
522 return BasePattern.match(self, node, results)
523
524 def _submatch(self, node, results=None):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000525 """
526 Match the pattern's content to the node's children.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000527
528 This assumes the node type matches and self.content is not None.
529
530 Returns True if it matches, False if not.
531
532 If results is not None, it must be a dict which will be
533 updated with the nodes matching named subpatterns.
534
535 When returning False, the results dict may still be updated.
536 """
537 return self.content == node.value
538
539
540class NodePattern(BasePattern):
541
542 wildcards = False
543
544 def __init__(self, type=None, content=None, name=None):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000545 """
546 Initializer. Takes optional type, content, and name.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000547
548 The type, if given, must be a symbol type (>= 256). If the
549 type is None this matches *any* single node (leaf or not),
550 except if content is not None, in which it only matches
551 non-leaf nodes that also match the content pattern.
552
553 The content, if not None, must be a sequence of Patterns that
554 must match the node's children exactly. If the content is
555 given, the type must not be None.
556
557 If a name is given, the matching node is stored in the results
558 dict under that key.
559 """
560 if type is not None:
561 assert type >= 256, type
562 if content is not None:
563 assert not isinstance(content, basestring), repr(content)
564 content = list(content)
565 for i, item in enumerate(content):
566 assert isinstance(item, BasePattern), (i, item)
567 if isinstance(item, WildcardPattern):
568 self.wildcards = True
569 self.type = type
570 self.content = content
571 self.name = name
572
573 def _submatch(self, node, results=None):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000574 """
575 Match the pattern's content to the node's children.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000576
577 This assumes the node type matches and self.content is not None.
578
579 Returns True if it matches, False if not.
580
581 If results is not None, it must be a dict which will be
582 updated with the nodes matching named subpatterns.
583
584 When returning False, the results dict may still be updated.
585 """
586 if self.wildcards:
587 for c, r in generate_matches(self.content, node.children):
588 if c == len(node.children):
589 if results is not None:
590 results.update(r)
591 return True
592 return False
593 if len(self.content) != len(node.children):
594 return False
595 for subpattern, child in zip(self.content, node.children):
596 if not subpattern.match(child, results):
597 return False
598 return True
599
600
601class WildcardPattern(BasePattern):
602
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000603 """
604 A wildcard pattern can match zero or more nodes.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000605
606 This has all the flexibility needed to implement patterns like:
607
608 .* .+ .? .{m,n}
609 (a b c | d e | f)
610 (...)* (...)+ (...)? (...){m,n}
611
612 except it always uses non-greedy matching.
613 """
614
615 def __init__(self, content=None, min=0, max=HUGE, name=None):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000616 """
617 Initializer.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000618
619 Args:
620 content: optional sequence of subsequences of patterns;
621 if absent, matches one node;
622 if present, each subsequence is an alternative [*]
623 min: optinal minumum number of times to match, default 0
624 max: optional maximum number of times tro match, default HUGE
625 name: optional name assigned to this match
626
627 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
628 equivalent to (a b c | d e | f g h); if content is None,
629 this is equivalent to '.' in regular expression terms.
630 The min and max parameters work as follows:
631 min=0, max=maxint: .*
632 min=1, max=maxint: .+
633 min=0, max=1: .?
634 min=1, max=1: .
635 If content is not None, replace the dot with the parenthesized
636 list of alternatives, e.g. (a b c | d e | f g h)*
637 """
638 assert 0 <= min <= max <= HUGE, (min, max)
639 if content is not None:
640 content = tuple(map(tuple, content)) # Protect against alterations
641 # Check sanity of alternatives
642 assert len(content), repr(content) # Can't have zero alternatives
643 for alt in content:
644 assert len(alt), repr(alt) # Can have empty alternatives
645 self.content = content
646 self.min = min
647 self.max = max
648 self.name = name
649
650 def optimize(self):
651 """Optimize certain stacked wildcard patterns."""
652 subpattern = None
653 if (self.content is not None and
654 len(self.content) == 1 and len(self.content[0]) == 1):
655 subpattern = self.content[0][0]
656 if self.min == 1 and self.max == 1:
657 if self.content is None:
658 return NodePattern(name=self.name)
659 if subpattern is not None and self.name == subpattern.name:
660 return subpattern.optimize()
661 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
662 subpattern.min <= 1 and self.name == subpattern.name):
663 return WildcardPattern(subpattern.content,
664 self.min*subpattern.min,
665 self.max*subpattern.max,
666 subpattern.name)
667 return self
668
669 def match(self, node, results=None):
670 """Does this pattern exactly match a node?"""
671 return self.match_seq([node], results)
672
673 def match_seq(self, nodes, results=None):
674 """Does this pattern exactly match a sequence of nodes?"""
675 for c, r in self.generate_matches(nodes):
676 if c == len(nodes):
677 if results is not None:
678 results.update(r)
679 if self.name:
680 results[self.name] = list(nodes)
681 return True
682 return False
683
684 def generate_matches(self, nodes):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000685 """
686 Generator yielding matches for a sequence of nodes.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000687
688 Args:
689 nodes: sequence of nodes
690
691 Yields:
692 (count, results) tuples where:
693 count: the match comprises nodes[:count];
694 results: dict containing named submatches.
695 """
696 if self.content is None:
697 # Shortcut for special case (see __init__.__doc__)
698 for count in xrange(self.min, 1 + min(len(nodes), self.max)):
699 r = {}
700 if self.name:
701 r[self.name] = nodes[:count]
702 yield count, r
Benjamin Peterson1ab31492008-07-17 02:07:46 +0000703 elif self.name == "bare_name":
704 yield self._bare_name_matches(nodes)
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000705 else:
Benjamin Peterson2555d9d2008-11-28 22:12:14 +0000706 # The reason for this is that hitting the recursion limit usually
707 # results in some ugly messages about how RuntimeErrors are being
708 # ignored.
709 save_stderr = sys.stderr
710 sys.stderr = StringIO()
Benjamin Peterson4d164152008-10-04 20:55:50 +0000711 try:
712 for count, r in self._recursive_matches(nodes, 0):
713 if self.name:
714 r[self.name] = nodes[:count]
715 yield count, r
716 except RuntimeError:
717 # We fall back to the iterative pattern matching scheme if the recursive
718 # scheme hits the recursion limit.
719 for count, r in self._iterative_matches(nodes):
720 if self.name:
721 r[self.name] = nodes[:count]
722 yield count, r
Benjamin Peterson2555d9d2008-11-28 22:12:14 +0000723 finally:
724 sys.stderr = save_stderr
Benjamin Peterson4d164152008-10-04 20:55:50 +0000725
726 def _iterative_matches(self, nodes):
727 """Helper to iteratively yield the matches."""
728 nodelen = len(nodes)
729 if 0 >= self.min:
730 yield 0, {}
731
732 results = []
733 # generate matches that use just one alt from self.content
734 for alt in self.content:
735 for c, r in generate_matches(alt, nodes):
736 yield c, r
737 results.append((c, r))
738
739 # for each match, iterate down the nodes
740 while results:
741 new_results = []
742 for c0, r0 in results:
743 # stop if the entire set of nodes has been matched
744 if c0 < nodelen and c0 <= self.max:
745 for alt in self.content:
746 for c1, r1 in generate_matches(alt, nodes[c0:]):
747 if c1 > 0:
748 r = {}
749 r.update(r0)
750 r.update(r1)
751 yield c0 + c1, r
752 new_results.append((c0 + c1, r))
753 results = new_results
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000754
Benjamin Peterson1ab31492008-07-17 02:07:46 +0000755 def _bare_name_matches(self, nodes):
756 """Special optimized matcher for bare_name."""
757 count = 0
758 r = {}
759 done = False
760 max = len(nodes)
761 while not done and count < max:
762 done = True
763 for leaf in self.content:
764 if leaf[0].match(nodes[count], r):
765 count += 1
766 done = False
767 break
768 r[self.name] = nodes[:count]
769 return count, r
770
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000771 def _recursive_matches(self, nodes, count):
772 """Helper to recursively yield the matches."""
773 assert self.content is not None
774 if count >= self.min:
Benjamin Peterson1ab31492008-07-17 02:07:46 +0000775 yield 0, {}
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000776 if count < self.max:
777 for alt in self.content:
778 for c0, r0 in generate_matches(alt, nodes):
779 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
780 r = {}
781 r.update(r0)
782 r.update(r1)
783 yield c0 + c1, r
784
785
786class NegatedPattern(BasePattern):
787
788 def __init__(self, content=None):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000789 """
790 Initializer.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000791
792 The argument is either a pattern or None. If it is None, this
793 only matches an empty sequence (effectively '$' in regex
794 lingo). If it is not None, this matches whenever the argument
795 pattern doesn't have any matches.
796 """
797 if content is not None:
798 assert isinstance(content, BasePattern), repr(content)
799 self.content = content
800
801 def match(self, node):
802 # We never match a node in its entirety
803 return False
804
805 def match_seq(self, nodes):
806 # We only match an empty sequence of nodes in its entirety
807 return len(nodes) == 0
808
809 def generate_matches(self, nodes):
810 if self.content is None:
811 # Return a match if there is an empty sequence
812 if len(nodes) == 0:
813 yield 0, {}
814 else:
815 # Return a match if the argument pattern has no matches
816 for c, r in self.content.generate_matches(nodes):
817 return
818 yield 0, {}
819
820
821def generate_matches(patterns, nodes):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000822 """
823 Generator yielding matches for a sequence of patterns and nodes.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000824
825 Args:
826 patterns: a sequence of patterns
827 nodes: a sequence of nodes
828
829 Yields:
830 (count, results) tuples where:
831 count: the entire sequence of patterns matches nodes[:count];
832 results: dict containing named submatches.
833 """
834 if not patterns:
835 yield 0, {}
836 else:
837 p, rest = patterns[0], patterns[1:]
838 for c0, r0 in p.generate_matches(nodes):
839 if not rest:
840 yield c0, r0
841 else:
842 for c1, r1 in generate_matches(rest, nodes[c0:]):
843 r = {}
844 r.update(r0)
845 r.update(r1)
846 yield c0 + c1, r