blob: c60f107614c223a04494f91d4706aff65fbb0e40 [file] [log] [blame]
Martin v. Löwisef04c442008-03-19 05:04:44 +00001# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
Benjamin Peterson608d8bc2009-05-05 23:23:31 +00004"""
5Python parse tree definitions.
Martin v. Löwisef04c442008-03-19 05:04:44 +00006
7This is a very concrete parse tree; we need to keep every token and
8even the comments and whitespace between tokens.
9
10There's also a pattern matching implementation here.
11"""
12
13__author__ = "Guido van Rossum <guido@python.org>"
14
Benjamin Peterson5cff9312008-11-28 23:01:28 +000015import sys
16from io import StringIO
17
Martin v. Löwisef04c442008-03-19 05:04:44 +000018
19HUGE = 0x7FFFFFFF # maximum repeat count, default max
20
Martin v. Löwis3faa84f2008-03-22 00:07:09 +000021_type_reprs = {}
22def type_repr(type_num):
23 global _type_reprs
24 if not _type_reprs:
25 from .pygram import python_symbols
26 # printing tokens is possible but not as useful
27 # from .pgen2 import token // token.__dict__.items():
28 for name, val in python_symbols.__dict__.items():
29 if type(val) == int: _type_reprs[val] = name
30 return _type_reprs.setdefault(type_num, type_num)
31
Martin v. Löwisef04c442008-03-19 05:04:44 +000032
33class Base(object):
34
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000035 """
36 Abstract base class for Node and Leaf.
Martin v. Löwisef04c442008-03-19 05:04:44 +000037
38 This provides some default functionality and boilerplate using the
39 template pattern.
40
41 A node may be a subnode of at most one parent.
42 """
43
44 # Default values for instance variables
45 type = None # int: token number (< 256) or symbol number (>= 256)
46 parent = None # Parent node pointer, or None
47 children = () # Tuple of subnodes
48 was_changed = False
49
50 def __new__(cls, *args, **kwds):
51 """Constructor that prevents Base from being instantiated."""
52 assert cls is not Base, "Cannot instantiate Base"
53 return object.__new__(cls)
54
55 def __eq__(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000056 """
57 Compare two nodes for equality.
Martin v. Löwisef04c442008-03-19 05:04:44 +000058
59 This calls the method _eq().
60 """
61 if self.__class__ is not other.__class__:
62 return NotImplemented
63 return self._eq(other)
64
65 def __ne__(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000066 """
67 Compare two nodes for inequality.
Martin v. Löwisef04c442008-03-19 05:04:44 +000068
69 This calls the method _eq().
70 """
71 if self.__class__ is not other.__class__:
72 return NotImplemented
73 return not self._eq(other)
74
75 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000076 """
77 Compare two nodes for equality.
Martin v. Löwisef04c442008-03-19 05:04:44 +000078
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000079 This is called by __eq__ and __ne__. It is only called if the two nodes
80 have the same type. This must be implemented by the concrete subclass.
81 Nodes should be considered equal if they have the same structure,
82 ignoring the prefix string and other context information.
Martin v. Löwisef04c442008-03-19 05:04:44 +000083 """
84 raise NotImplementedError
85
86 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000087 """
88 Return a cloned (deep) copy of self.
Martin v. Löwisef04c442008-03-19 05:04:44 +000089
90 This must be implemented by the concrete subclass.
91 """
92 raise NotImplementedError
93
94 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000095 """
96 Return a post-order iterator for the tree.
Martin v. Löwisef04c442008-03-19 05:04:44 +000097
98 This must be implemented by the concrete subclass.
99 """
100 raise NotImplementedError
101
102 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000103 """
104 Return a pre-order iterator for the tree.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000105
106 This must be implemented by the concrete subclass.
107 """
108 raise NotImplementedError
109
110 def set_prefix(self, prefix):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000111 """
112 Set the prefix for the node (see Leaf class).
Martin v. Löwisef04c442008-03-19 05:04:44 +0000113
114 This must be implemented by the concrete subclass.
115 """
116 raise NotImplementedError
117
118 def get_prefix(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000119 """
120 Return the prefix for the node (see Leaf class).
Martin v. Löwisef04c442008-03-19 05:04:44 +0000121
122 This must be implemented by the concrete subclass.
123 """
124 raise NotImplementedError
125
126 def replace(self, new):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000127 """Replace this node with a new one in the parent."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000128 assert self.parent is not None, str(self)
129 assert new is not None
130 if not isinstance(new, list):
131 new = [new]
132 l_children = []
133 found = False
134 for ch in self.parent.children:
135 if ch is self:
136 assert not found, (self.parent.children, self, new)
137 if new is not None:
138 l_children.extend(new)
139 found = True
140 else:
141 l_children.append(ch)
142 assert found, (self.children, self, new)
143 self.parent.changed()
144 self.parent.children = l_children
145 for x in new:
146 x.parent = self.parent
147 self.parent = None
148
149 def get_lineno(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000150 """Return the line number which generated the invocant node."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000151 node = self
152 while not isinstance(node, Leaf):
153 if not node.children:
154 return
155 node = node.children[0]
156 return node.lineno
157
158 def changed(self):
159 if self.parent:
160 self.parent.changed()
161 self.was_changed = True
162
163 def remove(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000164 """
165 Remove the node from the tree. Returns the position of the node in its
166 parent's children before it was removed.
167 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000168 if self.parent:
169 for i, node in enumerate(self.parent.children):
170 if node is self:
171 self.parent.changed()
172 del self.parent.children[i]
173 self.parent = None
174 return i
175
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000176 @property
177 def next_sibling(self):
178 """
179 The node immediately following the invocant in their parent's children
180 list. If the invocant does not have a next sibling, it is None
181 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000182 if self.parent is None:
183 return None
184
185 # Can't use index(); we need to test by identity
Martin v. Löwis3de92bf2008-04-10 02:50:50 +0000186 for i, child in enumerate(self.parent.children):
187 if child is self:
Martin v. Löwisef04c442008-03-19 05:04:44 +0000188 try:
189 return self.parent.children[i+1]
190 except IndexError:
191 return None
192
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000193 @property
194 def prev_sibling(self):
195 """
196 The node immediately preceding the invocant in their parent's children
197 list. If the invocant does not have a previous sibling, it is None.
198 """
Martin v. Löwis3de92bf2008-04-10 02:50:50 +0000199 if self.parent is None:
200 return None
201
202 # Can't use index(); we need to test by identity
203 for i, child in enumerate(self.parent.children):
204 if child is self:
205 if i == 0:
206 return None
207 return self.parent.children[i-1]
208
Martin v. Löwisef04c442008-03-19 05:04:44 +0000209 def get_suffix(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000210 """
211 Return the string immediately following the invocant node. This is
212 effectively equivalent to node.next_sibling.get_prefix()
213 """
214 next_sib = self.next_sibling
Martin v. Löwisef04c442008-03-19 05:04:44 +0000215 if next_sib is None:
216 return ""
217 return next_sib.get_prefix()
218
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000219 if sys.version_info < (3, 0):
220 def __str__(self):
221 return str(self).encode("ascii")
222
Martin v. Löwisef04c442008-03-19 05:04:44 +0000223
224class Node(Base):
225
226 """Concrete implementation for interior nodes."""
227
228 def __init__(self, type, children, context=None, prefix=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000229 """
230 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000231
232 Takes a type constant (a symbol number >= 256), a sequence of
233 child nodes, and an optional context keyword argument.
234
235 As a side effect, the parent pointers of the children are updated.
236 """
237 assert type >= 256, type
238 self.type = type
239 self.children = list(children)
240 for ch in self.children:
241 assert ch.parent is None, repr(ch)
242 ch.parent = self
243 if prefix is not None:
244 self.set_prefix(prefix)
245
246 def __repr__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000247 """Return a canonical string representation."""
Martin v. Löwis3faa84f2008-03-22 00:07:09 +0000248 return "%s(%s, %r)" % (self.__class__.__name__,
249 type_repr(self.type),
Martin v. Löwisef04c442008-03-19 05:04:44 +0000250 self.children)
251
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000252 def __unicode__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000253 """
254 Return a pretty string representation.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000255
256 This reproduces the input source exactly.
257 """
258 return "".join(map(str, self.children))
259
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000260 if sys.version_info > (3, 0):
261 __str__ = __unicode__
262
Martin v. Löwisef04c442008-03-19 05:04:44 +0000263 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000264 """Compare two nodes for equality."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000265 return (self.type, self.children) == (other.type, other.children)
266
267 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000268 """Return a cloned (deep) copy of self."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000269 return Node(self.type, [ch.clone() for ch in self.children])
270
271 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000272 """Return a post-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000273 for child in self.children:
274 for node in child.post_order():
275 yield node
276 yield self
277
278 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000279 """Return a pre-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000280 yield self
281 for child in self.children:
282 for node in child.post_order():
283 yield node
284
285 def set_prefix(self, prefix):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000286 """
287 Set the prefix for the node.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000288
289 This passes the responsibility on to the first child.
290 """
291 if self.children:
292 self.children[0].set_prefix(prefix)
293
294 def get_prefix(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000295 """
296 Return the prefix for the node.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000297
298 This passes the call on to the first child.
299 """
300 if not self.children:
301 return ""
302 return self.children[0].get_prefix()
303
304 def set_child(self, i, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000305 """
306 Equivalent to 'node.children[i] = child'. This method also sets the
307 child's parent attribute appropriately.
308 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000309 child.parent = self
310 self.children[i].parent = None
311 self.children[i] = child
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000312 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000313
314 def insert_child(self, i, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000315 """
316 Equivalent to 'node.children.insert(i, child)'. This method also sets
317 the child's parent attribute appropriately.
318 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000319 child.parent = self
320 self.children.insert(i, child)
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000321 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000322
323 def append_child(self, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000324 """
325 Equivalent to 'node.children.append(child)'. This method also sets the
326 child's parent attribute appropriately.
327 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000328 child.parent = self
329 self.children.append(child)
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000330 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000331
332
333class Leaf(Base):
334
335 """Concrete implementation for leaf nodes."""
336
337 # Default values for instance variables
338 prefix = "" # Whitespace and comments preceding this token in the input
339 lineno = 0 # Line where this token starts in the input
340 column = 0 # Column where this token tarts in the input
341
342 def __init__(self, type, value, context=None, prefix=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000343 """
344 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000345
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000346 Takes a type constant (a token number < 256), a string value, and an
347 optional context keyword argument.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000348 """
349 assert 0 <= type < 256, type
350 if context is not None:
351 self.prefix, (self.lineno, self.column) = context
352 self.type = type
353 self.value = value
354 if prefix is not None:
355 self.prefix = prefix
356
357 def __repr__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000358 """Return a canonical string representation."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000359 return "%s(%r, %r)" % (self.__class__.__name__,
360 self.type,
361 self.value)
362
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000363 def __unicode__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000364 """
365 Return a pretty string representation.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000366
367 This reproduces the input source exactly.
368 """
369 return self.prefix + str(self.value)
370
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000371 if sys.version_info > (3, 0):
372 __str__ = __unicode__
373
Martin v. Löwisef04c442008-03-19 05:04:44 +0000374 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000375 """Compare two nodes for equality."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000376 return (self.type, self.value) == (other.type, other.value)
377
378 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000379 """Return a cloned (deep) copy of self."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000380 return Leaf(self.type, self.value,
381 (self.prefix, (self.lineno, self.column)))
382
383 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000384 """Return a post-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000385 yield self
386
387 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000388 """Return a pre-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000389 yield self
390
391 def set_prefix(self, prefix):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000392 """Set the prefix for the node."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000393 self.changed()
394 self.prefix = prefix
395
396 def get_prefix(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000397 """Return the prefix for the node."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000398 return self.prefix
399
400
401def convert(gr, raw_node):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000402 """
403 Convert raw node information to a Node or Leaf instance.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000404
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000405 This is passed to the parser driver which calls it whenever a reduction of a
406 grammar rule produces a new complete node, so that the tree is build
407 strictly bottom-up.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000408 """
409 type, value, context, children = raw_node
410 if children or type in gr.number2symbol:
411 # If there's exactly one child, return that child instead of
412 # creating a new node.
413 if len(children) == 1:
414 return children[0]
415 return Node(type, children, context=context)
416 else:
417 return Leaf(type, value, context=context)
418
419
420class BasePattern(object):
421
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000422 """
423 A pattern is a tree matching pattern.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000424
425 It looks for a specific node type (token or symbol), and
426 optionally for a specific content.
427
428 This is an abstract base class. There are three concrete
429 subclasses:
430
431 - LeafPattern matches a single leaf node;
432 - NodePattern matches a single node (usually non-leaf);
433 - WildcardPattern matches a sequence of nodes of variable length.
434 """
435
436 # Defaults for instance variables
437 type = None # Node type (token if < 256, symbol if >= 256)
438 content = None # Optional content matching pattern
439 name = None # Optional name used to store match in results dict
440
441 def __new__(cls, *args, **kwds):
442 """Constructor that prevents BasePattern from being instantiated."""
443 assert cls is not BasePattern, "Cannot instantiate BasePattern"
444 return object.__new__(cls)
445
446 def __repr__(self):
Martin v. Löwis3faa84f2008-03-22 00:07:09 +0000447 args = [type_repr(self.type), self.content, self.name]
Martin v. Löwisef04c442008-03-19 05:04:44 +0000448 while args and args[-1] is None:
449 del args[-1]
450 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
451
452 def optimize(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000453 """
454 A subclass can define this as a hook for optimizations.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000455
456 Returns either self or another node with the same effect.
457 """
458 return self
459
460 def match(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000461 """
462 Does this pattern exactly match a node?
Martin v. Löwisef04c442008-03-19 05:04:44 +0000463
464 Returns True if it matches, False if not.
465
466 If results is not None, it must be a dict which will be
467 updated with the nodes matching named subpatterns.
468
469 Default implementation for non-wildcard patterns.
470 """
471 if self.type is not None and node.type != self.type:
472 return False
473 if self.content is not None:
474 r = None
475 if results is not None:
476 r = {}
477 if not self._submatch(node, r):
478 return False
479 if r:
480 results.update(r)
481 if results is not None and self.name:
482 results[self.name] = node
483 return True
484
485 def match_seq(self, nodes, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000486 """
487 Does this pattern exactly match a sequence of nodes?
Martin v. Löwisef04c442008-03-19 05:04:44 +0000488
489 Default implementation for non-wildcard patterns.
490 """
491 if len(nodes) != 1:
492 return False
493 return self.match(nodes[0], results)
494
495 def generate_matches(self, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000496 """
497 Generator yielding all matches for this pattern.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000498
499 Default implementation for non-wildcard patterns.
500 """
501 r = {}
502 if nodes and self.match(nodes[0], r):
503 yield 1, r
504
505
506class LeafPattern(BasePattern):
507
508 def __init__(self, type=None, content=None, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000509 """
510 Initializer. Takes optional type, content, and name.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000511
512 The type, if given must be a token type (< 256). If not given,
513 this matches any *leaf* node; the content may still be required.
514
515 The content, if given, must be a string.
516
517 If a name is given, the matching node is stored in the results
518 dict under that key.
519 """
520 if type is not None:
521 assert 0 <= type < 256, type
522 if content is not None:
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000523 assert isinstance(content, str), repr(content)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000524 self.type = type
525 self.content = content
526 self.name = name
527
528 def match(self, node, results=None):
529 """Override match() to insist on a leaf node."""
530 if not isinstance(node, Leaf):
531 return False
532 return BasePattern.match(self, node, results)
533
534 def _submatch(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000535 """
536 Match the pattern's content to the node's children.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000537
538 This assumes the node type matches and self.content is not None.
539
540 Returns True if it matches, False if not.
541
542 If results is not None, it must be a dict which will be
543 updated with the nodes matching named subpatterns.
544
545 When returning False, the results dict may still be updated.
546 """
547 return self.content == node.value
548
549
550class NodePattern(BasePattern):
551
552 wildcards = False
553
554 def __init__(self, type=None, content=None, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000555 """
556 Initializer. Takes optional type, content, and name.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000557
558 The type, if given, must be a symbol type (>= 256). If the
559 type is None this matches *any* single node (leaf or not),
560 except if content is not None, in which it only matches
561 non-leaf nodes that also match the content pattern.
562
563 The content, if not None, must be a sequence of Patterns that
564 must match the node's children exactly. If the content is
565 given, the type must not be None.
566
567 If a name is given, the matching node is stored in the results
568 dict under that key.
569 """
570 if type is not None:
571 assert type >= 256, type
572 if content is not None:
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000573 assert not isinstance(content, str), repr(content)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000574 content = list(content)
575 for i, item in enumerate(content):
576 assert isinstance(item, BasePattern), (i, item)
577 if isinstance(item, WildcardPattern):
578 self.wildcards = True
579 self.type = type
580 self.content = content
581 self.name = name
582
583 def _submatch(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000584 """
585 Match the pattern's content to the node's children.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000586
587 This assumes the node type matches and self.content is not None.
588
589 Returns True if it matches, False if not.
590
591 If results is not None, it must be a dict which will be
592 updated with the nodes matching named subpatterns.
593
594 When returning False, the results dict may still be updated.
595 """
596 if self.wildcards:
597 for c, r in generate_matches(self.content, node.children):
598 if c == len(node.children):
599 if results is not None:
600 results.update(r)
601 return True
602 return False
603 if len(self.content) != len(node.children):
604 return False
605 for subpattern, child in zip(self.content, node.children):
606 if not subpattern.match(child, results):
607 return False
608 return True
609
610
611class WildcardPattern(BasePattern):
612
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000613 """
614 A wildcard pattern can match zero or more nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000615
616 This has all the flexibility needed to implement patterns like:
617
618 .* .+ .? .{m,n}
619 (a b c | d e | f)
620 (...)* (...)+ (...)? (...){m,n}
621
622 except it always uses non-greedy matching.
623 """
624
625 def __init__(self, content=None, min=0, max=HUGE, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000626 """
627 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000628
629 Args:
630 content: optional sequence of subsequences of patterns;
631 if absent, matches one node;
632 if present, each subsequence is an alternative [*]
633 min: optinal minumum number of times to match, default 0
634 max: optional maximum number of times tro match, default HUGE
635 name: optional name assigned to this match
636
637 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
638 equivalent to (a b c | d e | f g h); if content is None,
639 this is equivalent to '.' in regular expression terms.
640 The min and max parameters work as follows:
641 min=0, max=maxint: .*
642 min=1, max=maxint: .+
643 min=0, max=1: .?
644 min=1, max=1: .
645 If content is not None, replace the dot with the parenthesized
646 list of alternatives, e.g. (a b c | d e | f g h)*
647 """
648 assert 0 <= min <= max <= HUGE, (min, max)
649 if content is not None:
650 content = tuple(map(tuple, content)) # Protect against alterations
651 # Check sanity of alternatives
652 assert len(content), repr(content) # Can't have zero alternatives
653 for alt in content:
654 assert len(alt), repr(alt) # Can have empty alternatives
655 self.content = content
656 self.min = min
657 self.max = max
658 self.name = name
659
660 def optimize(self):
661 """Optimize certain stacked wildcard patterns."""
662 subpattern = None
663 if (self.content is not None and
664 len(self.content) == 1 and len(self.content[0]) == 1):
665 subpattern = self.content[0][0]
666 if self.min == 1 and self.max == 1:
667 if self.content is None:
668 return NodePattern(name=self.name)
669 if subpattern is not None and self.name == subpattern.name:
670 return subpattern.optimize()
671 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
672 subpattern.min <= 1 and self.name == subpattern.name):
673 return WildcardPattern(subpattern.content,
674 self.min*subpattern.min,
675 self.max*subpattern.max,
676 subpattern.name)
677 return self
678
679 def match(self, node, results=None):
680 """Does this pattern exactly match a node?"""
681 return self.match_seq([node], results)
682
683 def match_seq(self, nodes, results=None):
684 """Does this pattern exactly match a sequence of nodes?"""
685 for c, r in self.generate_matches(nodes):
686 if c == len(nodes):
687 if results is not None:
688 results.update(r)
689 if self.name:
690 results[self.name] = list(nodes)
691 return True
692 return False
693
694 def generate_matches(self, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000695 """
696 Generator yielding matches for a sequence of nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000697
698 Args:
699 nodes: sequence of nodes
700
701 Yields:
702 (count, results) tuples where:
703 count: the match comprises nodes[:count];
704 results: dict containing named submatches.
705 """
706 if self.content is None:
707 # Shortcut for special case (see __init__.__doc__)
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000708 for count in range(self.min, 1 + min(len(nodes), self.max)):
Martin v. Löwisef04c442008-03-19 05:04:44 +0000709 r = {}
710 if self.name:
711 r[self.name] = nodes[:count]
712 yield count, r
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000713 elif self.name == "bare_name":
714 yield self._bare_name_matches(nodes)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000715 else:
Benjamin Peterson5cff9312008-11-28 23:01:28 +0000716 # The reason for this is that hitting the recursion limit usually
717 # results in some ugly messages about how RuntimeErrors are being
718 # ignored.
719 save_stderr = sys.stderr
720 sys.stderr = StringIO()
Benjamin Peterson7d8d9a52008-10-04 21:04:36 +0000721 try:
722 for count, r in self._recursive_matches(nodes, 0):
723 if self.name:
724 r[self.name] = nodes[:count]
725 yield count, r
726 except RuntimeError:
727 # We fall back to the iterative pattern matching scheme if the recursive
728 # scheme hits the recursion limit.
729 for count, r in self._iterative_matches(nodes):
730 if self.name:
731 r[self.name] = nodes[:count]
732 yield count, r
Benjamin Peterson5cff9312008-11-28 23:01:28 +0000733 finally:
734 sys.stderr = save_stderr
Benjamin Peterson7d8d9a52008-10-04 21:04:36 +0000735
736 def _iterative_matches(self, nodes):
737 """Helper to iteratively yield the matches."""
738 nodelen = len(nodes)
739 if 0 >= self.min:
740 yield 0, {}
741
742 results = []
743 # generate matches that use just one alt from self.content
744 for alt in self.content:
745 for c, r in generate_matches(alt, nodes):
746 yield c, r
747 results.append((c, r))
748
749 # for each match, iterate down the nodes
750 while results:
751 new_results = []
752 for c0, r0 in results:
753 # stop if the entire set of nodes has been matched
754 if c0 < nodelen and c0 <= self.max:
755 for alt in self.content:
756 for c1, r1 in generate_matches(alt, nodes[c0:]):
757 if c1 > 0:
758 r = {}
759 r.update(r0)
760 r.update(r1)
761 yield c0 + c1, r
762 new_results.append((c0 + c1, r))
763 results = new_results
Martin v. Löwisef04c442008-03-19 05:04:44 +0000764
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000765 def _bare_name_matches(self, nodes):
766 """Special optimized matcher for bare_name."""
767 count = 0
768 r = {}
769 done = False
770 max = len(nodes)
771 while not done and count < max:
772 done = True
773 for leaf in self.content:
774 if leaf[0].match(nodes[count], r):
775 count += 1
776 done = False
777 break
778 r[self.name] = nodes[:count]
779 return count, r
780
Martin v. Löwisef04c442008-03-19 05:04:44 +0000781 def _recursive_matches(self, nodes, count):
782 """Helper to recursively yield the matches."""
783 assert self.content is not None
784 if count >= self.min:
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000785 yield 0, {}
Martin v. Löwisef04c442008-03-19 05:04:44 +0000786 if count < self.max:
787 for alt in self.content:
788 for c0, r0 in generate_matches(alt, nodes):
789 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
790 r = {}
791 r.update(r0)
792 r.update(r1)
793 yield c0 + c1, r
794
795
796class NegatedPattern(BasePattern):
797
798 def __init__(self, content=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000799 """
800 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000801
802 The argument is either a pattern or None. If it is None, this
803 only matches an empty sequence (effectively '$' in regex
804 lingo). If it is not None, this matches whenever the argument
805 pattern doesn't have any matches.
806 """
807 if content is not None:
808 assert isinstance(content, BasePattern), repr(content)
809 self.content = content
810
811 def match(self, node):
812 # We never match a node in its entirety
813 return False
814
815 def match_seq(self, nodes):
816 # We only match an empty sequence of nodes in its entirety
817 return len(nodes) == 0
818
819 def generate_matches(self, nodes):
820 if self.content is None:
821 # Return a match if there is an empty sequence
822 if len(nodes) == 0:
823 yield 0, {}
824 else:
825 # Return a match if the argument pattern has no matches
826 for c, r in self.content.generate_matches(nodes):
827 return
828 yield 0, {}
829
830
831def generate_matches(patterns, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000832 """
833 Generator yielding matches for a sequence of patterns and nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000834
835 Args:
836 patterns: a sequence of patterns
837 nodes: a sequence of nodes
838
839 Yields:
840 (count, results) tuples where:
841 count: the entire sequence of patterns matches nodes[:count];
842 results: dict containing named submatches.
843 """
844 if not patterns:
845 yield 0, {}
846 else:
847 p, rest = patterns[0], patterns[1:]
848 for c0, r0 in p.generate_matches(nodes):
849 if not rest:
850 yield c0, r0
851 else:
852 for c1, r1 in generate_matches(rest, nodes[c0:]):
853 r = {}
854 r.update(r0)
855 r.update(r1)
856 yield c0 + c1, r