blob: 3daab16d60ba6a4b78aeb793ed7fd3e1a58649fd [file] [log] [blame]
Martin v. Löwis5e37bae2008-03-19 04:43:46 +00001# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +00004"""
5Python parse tree definitions.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +00006
7This is a very concrete parse tree; we need to keep every token and
8even the comments and whitespace between tokens.
9
10There's also a pattern matching implementation here.
11"""
12
13__author__ = "Guido van Rossum <guido@python.org>"
14
Benjamin Peterson2555d9d2008-11-28 22:12:14 +000015import sys
Benjamin Peterson61180402009-06-11 22:06:46 +000016import warnings
Benjamin Peterson2555d9d2008-11-28 22:12:14 +000017from StringIO import StringIO
18
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000019
20HUGE = 0x7FFFFFFF # maximum repeat count, default max
21
Martin v. Löwisbaf267c2008-03-22 00:01:12 +000022_type_reprs = {}
23def type_repr(type_num):
24 global _type_reprs
25 if not _type_reprs:
26 from .pygram import python_symbols
27 # printing tokens is possible but not as useful
28 # from .pgen2 import token // token.__dict__.items():
29 for name, val in python_symbols.__dict__.items():
30 if type(val) == int: _type_reprs[val] = name
31 return _type_reprs.setdefault(type_num, type_num)
32
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000033
34class Base(object):
35
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +000036 """
37 Abstract base class for Node and Leaf.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000038
39 This provides some default functionality and boilerplate using the
40 template pattern.
41
42 A node may be a subnode of at most one parent.
43 """
44
45 # Default values for instance variables
46 type = None # int: token number (< 256) or symbol number (>= 256)
47 parent = None # Parent node pointer, or None
48 children = () # Tuple of subnodes
49 was_changed = False
50
51 def __new__(cls, *args, **kwds):
52 """Constructor that prevents Base from being instantiated."""
53 assert cls is not Base, "Cannot instantiate Base"
54 return object.__new__(cls)
55
56 def __eq__(self, other):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +000057 """
58 Compare two nodes for equality.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000059
60 This calls the method _eq().
61 """
62 if self.__class__ is not other.__class__:
63 return NotImplemented
64 return self._eq(other)
65
66 def __ne__(self, other):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +000067 """
68 Compare two nodes for inequality.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000069
70 This calls the method _eq().
71 """
72 if self.__class__ is not other.__class__:
73 return NotImplemented
74 return not self._eq(other)
75
76 def _eq(self, other):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +000077 """
78 Compare two nodes for equality.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000079
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +000080 This is called by __eq__ and __ne__. It is only called if the two nodes
81 have the same type. This must be implemented by the concrete subclass.
82 Nodes should be considered equal if they have the same structure,
83 ignoring the prefix string and other context information.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000084 """
85 raise NotImplementedError
86
87 def clone(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +000088 """
89 Return a cloned (deep) copy of self.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000090
91 This must be implemented by the concrete subclass.
92 """
93 raise NotImplementedError
94
95 def post_order(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +000096 """
97 Return a post-order iterator for the tree.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000098
99 This must be implemented by the concrete subclass.
100 """
101 raise NotImplementedError
102
103 def pre_order(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000104 """
105 Return a pre-order iterator for the tree.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000106
107 This must be implemented by the concrete subclass.
108 """
109 raise NotImplementedError
110
111 def set_prefix(self, prefix):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000112 """
113 Set the prefix for the node (see Leaf class).
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000114
Benjamin Peterson61180402009-06-11 22:06:46 +0000115 DEPRECATED; use the prefix property directly.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000116 """
Benjamin Peterson61180402009-06-11 22:06:46 +0000117 warnings.warn("set_prefix() is deprecated; use the prefix property",
118 DeprecationWarning, stacklevel=2)
119 self.prefix = prefix
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000120
121 def get_prefix(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000122 """
123 Return the prefix for the node (see Leaf class).
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000124
Benjamin Peterson61180402009-06-11 22:06:46 +0000125 DEPRECATED; use the prefix property directly.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000126 """
Benjamin Peterson61180402009-06-11 22:06:46 +0000127 warnings.warn("get_prefix() is deprecated; use the prefix property",
128 DeprecationWarning, stacklevel=2)
129 return self.prefix
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000130
131 def replace(self, new):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000132 """Replace this node with a new one in the parent."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000133 assert self.parent is not None, str(self)
134 assert new is not None
135 if not isinstance(new, list):
136 new = [new]
137 l_children = []
138 found = False
139 for ch in self.parent.children:
140 if ch is self:
141 assert not found, (self.parent.children, self, new)
142 if new is not None:
143 l_children.extend(new)
144 found = True
145 else:
146 l_children.append(ch)
147 assert found, (self.children, self, new)
148 self.parent.changed()
149 self.parent.children = l_children
150 for x in new:
151 x.parent = self.parent
152 self.parent = None
153
154 def get_lineno(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000155 """Return the line number which generated the invocant node."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000156 node = self
157 while not isinstance(node, Leaf):
158 if not node.children:
159 return
160 node = node.children[0]
161 return node.lineno
162
163 def changed(self):
164 if self.parent:
165 self.parent.changed()
166 self.was_changed = True
167
168 def remove(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000169 """
170 Remove the node from the tree. Returns the position of the node in its
171 parent's children before it was removed.
172 """
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000173 if self.parent:
174 for i, node in enumerate(self.parent.children):
175 if node is self:
176 self.parent.changed()
177 del self.parent.children[i]
178 self.parent = None
179 return i
180
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000181 @property
182 def next_sibling(self):
183 """
184 The node immediately following the invocant in their parent's children
185 list. If the invocant does not have a next sibling, it is None
186 """
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000187 if self.parent is None:
188 return None
189
190 # Can't use index(); we need to test by identity
Martin v. Löwis60a819d2008-04-10 02:48:01 +0000191 for i, child in enumerate(self.parent.children):
192 if child is self:
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000193 try:
194 return self.parent.children[i+1]
195 except IndexError:
196 return None
197
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000198 @property
199 def prev_sibling(self):
200 """
201 The node immediately preceding the invocant in their parent's children
202 list. If the invocant does not have a previous sibling, it is None.
203 """
Martin v. Löwis60a819d2008-04-10 02:48:01 +0000204 if self.parent is None:
205 return None
206
207 # Can't use index(); we need to test by identity
208 for i, child in enumerate(self.parent.children):
209 if child is self:
210 if i == 0:
211 return None
212 return self.parent.children[i-1]
213
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000214 def get_suffix(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000215 """
216 Return the string immediately following the invocant node. This is
Benjamin Peterson61180402009-06-11 22:06:46 +0000217 effectively equivalent to node.next_sibling.prefix
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000218 """
219 next_sib = self.next_sibling
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000220 if next_sib is None:
Benjamin Peterson84ad84e2009-05-09 01:01:14 +0000221 return u""
Benjamin Peterson61180402009-06-11 22:06:46 +0000222 return next_sib.prefix
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000223
Benjamin Peterson84ad84e2009-05-09 01:01:14 +0000224 if sys.version_info < (3, 0):
225 def __str__(self):
226 return unicode(self).encode("ascii")
227
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000228
229class Node(Base):
230
231 """Concrete implementation for interior nodes."""
232
233 def __init__(self, type, children, context=None, prefix=None):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000234 """
235 Initializer.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000236
237 Takes a type constant (a symbol number >= 256), a sequence of
238 child nodes, and an optional context keyword argument.
239
240 As a side effect, the parent pointers of the children are updated.
241 """
242 assert type >= 256, type
243 self.type = type
244 self.children = list(children)
245 for ch in self.children:
246 assert ch.parent is None, repr(ch)
247 ch.parent = self
248 if prefix is not None:
Benjamin Peterson61180402009-06-11 22:06:46 +0000249 self.prefix = prefix
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000250
251 def __repr__(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000252 """Return a canonical string representation."""
Martin v. Löwisbaf267c2008-03-22 00:01:12 +0000253 return "%s(%s, %r)" % (self.__class__.__name__,
254 type_repr(self.type),
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000255 self.children)
256
Benjamin Peterson84ad84e2009-05-09 01:01:14 +0000257 def __unicode__(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000258 """
259 Return a pretty string representation.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000260
261 This reproduces the input source exactly.
262 """
Benjamin Peterson84ad84e2009-05-09 01:01:14 +0000263 return u"".join(map(unicode, self.children))
264
265 if sys.version_info > (3, 0):
266 __str__ = __unicode__
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000267
268 def _eq(self, other):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000269 """Compare two nodes for equality."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000270 return (self.type, self.children) == (other.type, other.children)
271
272 def clone(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000273 """Return a cloned (deep) copy of self."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000274 return Node(self.type, [ch.clone() for ch in self.children])
275
276 def post_order(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000277 """Return a post-order iterator for the tree."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000278 for child in self.children:
279 for node in child.post_order():
280 yield node
281 yield self
282
283 def pre_order(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000284 """Return a pre-order iterator for the tree."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000285 yield self
286 for child in self.children:
287 for node in child.post_order():
288 yield node
289
Benjamin Peterson61180402009-06-11 22:06:46 +0000290 @property
291 def prefix(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000292 """
Benjamin Peterson61180402009-06-11 22:06:46 +0000293 The whitespace and comments preceding this node in the input.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000294 """
295 if not self.children:
296 return ""
Benjamin Peterson61180402009-06-11 22:06:46 +0000297 return self.children[0].prefix
298
299 @prefix.setter
300 def prefix(self, prefix):
301 if self.children:
302 self.children[0].prefix = prefix
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000303
304 def set_child(self, i, child):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000305 """
306 Equivalent to 'node.children[i] = child'. This method also sets the
307 child's parent attribute appropriately.
308 """
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000309 child.parent = self
310 self.children[i].parent = None
311 self.children[i] = child
Benjamin Peterson43caaa02008-12-16 03:35:28 +0000312 self.changed()
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000313
314 def insert_child(self, i, child):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000315 """
316 Equivalent to 'node.children.insert(i, child)'. This method also sets
317 the child's parent attribute appropriately.
318 """
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000319 child.parent = self
320 self.children.insert(i, child)
Benjamin Peterson43caaa02008-12-16 03:35:28 +0000321 self.changed()
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000322
323 def append_child(self, child):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000324 """
325 Equivalent to 'node.children.append(child)'. This method also sets the
326 child's parent attribute appropriately.
327 """
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000328 child.parent = self
329 self.children.append(child)
Benjamin Peterson43caaa02008-12-16 03:35:28 +0000330 self.changed()
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000331
332
333class Leaf(Base):
334
335 """Concrete implementation for leaf nodes."""
336
337 # Default values for instance variables
Benjamin Peterson61180402009-06-11 22:06:46 +0000338 _prefix = "" # Whitespace and comments preceding this token in the input
339 lineno = 0 # Line where this token starts in the input
340 column = 0 # Column where this token tarts in the input
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000341
342 def __init__(self, type, value, context=None, prefix=None):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000343 """
344 Initializer.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000345
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000346 Takes a type constant (a token number < 256), a string value, and an
347 optional context keyword argument.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000348 """
349 assert 0 <= type < 256, type
350 if context is not None:
Benjamin Peterson61180402009-06-11 22:06:46 +0000351 self._prefix, (self.lineno, self.column) = context
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000352 self.type = type
353 self.value = value
354 if prefix is not None:
Benjamin Peterson61180402009-06-11 22:06:46 +0000355 self._prefix = prefix
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000356
357 def __repr__(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000358 """Return a canonical string representation."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000359 return "%s(%r, %r)" % (self.__class__.__name__,
360 self.type,
361 self.value)
362
Benjamin Peterson84ad84e2009-05-09 01:01:14 +0000363 def __unicode__(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000364 """
365 Return a pretty string representation.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000366
367 This reproduces the input source exactly.
368 """
Benjamin Peterson84ad84e2009-05-09 01:01:14 +0000369 return self.prefix + unicode(self.value)
370
371 if sys.version_info > (3, 0):
372 __str__ = __unicode__
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000373
374 def _eq(self, other):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000375 """Compare two nodes for equality."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000376 return (self.type, self.value) == (other.type, other.value)
377
378 def clone(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000379 """Return a cloned (deep) copy of self."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000380 return Leaf(self.type, self.value,
381 (self.prefix, (self.lineno, self.column)))
382
383 def post_order(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000384 """Return a post-order iterator for the tree."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000385 yield self
386
387 def pre_order(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000388 """Return a pre-order iterator for the tree."""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000389 yield self
390
Benjamin Peterson61180402009-06-11 22:06:46 +0000391 @property
392 def prefix(self):
393 """
394 The whitespace and comments preceding this token in the input.
395 """
396 return self._prefix
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000397
Benjamin Peterson61180402009-06-11 22:06:46 +0000398 @prefix.setter
399 def prefix(self, prefix):
400 self.changed()
401 self._prefix = prefix
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000402
403
404def convert(gr, raw_node):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000405 """
406 Convert raw node information to a Node or Leaf instance.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000407
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000408 This is passed to the parser driver which calls it whenever a reduction of a
409 grammar rule produces a new complete node, so that the tree is build
410 strictly bottom-up.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000411 """
412 type, value, context, children = raw_node
413 if children or type in gr.number2symbol:
414 # If there's exactly one child, return that child instead of
415 # creating a new node.
416 if len(children) == 1:
417 return children[0]
418 return Node(type, children, context=context)
419 else:
420 return Leaf(type, value, context=context)
421
422
423class BasePattern(object):
424
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000425 """
426 A pattern is a tree matching pattern.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000427
428 It looks for a specific node type (token or symbol), and
429 optionally for a specific content.
430
431 This is an abstract base class. There are three concrete
432 subclasses:
433
434 - LeafPattern matches a single leaf node;
435 - NodePattern matches a single node (usually non-leaf);
436 - WildcardPattern matches a sequence of nodes of variable length.
437 """
438
439 # Defaults for instance variables
440 type = None # Node type (token if < 256, symbol if >= 256)
441 content = None # Optional content matching pattern
442 name = None # Optional name used to store match in results dict
443
444 def __new__(cls, *args, **kwds):
445 """Constructor that prevents BasePattern from being instantiated."""
446 assert cls is not BasePattern, "Cannot instantiate BasePattern"
447 return object.__new__(cls)
448
449 def __repr__(self):
Martin v. Löwisbaf267c2008-03-22 00:01:12 +0000450 args = [type_repr(self.type), self.content, self.name]
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000451 while args and args[-1] is None:
452 del args[-1]
453 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
454
455 def optimize(self):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000456 """
457 A subclass can define this as a hook for optimizations.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000458
459 Returns either self or another node with the same effect.
460 """
461 return self
462
463 def match(self, node, results=None):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000464 """
465 Does this pattern exactly match a node?
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000466
467 Returns True if it matches, False if not.
468
469 If results is not None, it must be a dict which will be
470 updated with the nodes matching named subpatterns.
471
472 Default implementation for non-wildcard patterns.
473 """
474 if self.type is not None and node.type != self.type:
475 return False
476 if self.content is not None:
477 r = None
478 if results is not None:
479 r = {}
480 if not self._submatch(node, r):
481 return False
482 if r:
483 results.update(r)
484 if results is not None and self.name:
485 results[self.name] = node
486 return True
487
488 def match_seq(self, nodes, results=None):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000489 """
490 Does this pattern exactly match a sequence of nodes?
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000491
492 Default implementation for non-wildcard patterns.
493 """
494 if len(nodes) != 1:
495 return False
496 return self.match(nodes[0], results)
497
498 def generate_matches(self, nodes):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000499 """
500 Generator yielding all matches for this pattern.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000501
502 Default implementation for non-wildcard patterns.
503 """
504 r = {}
505 if nodes and self.match(nodes[0], r):
506 yield 1, r
507
508
509class LeafPattern(BasePattern):
510
511 def __init__(self, type=None, content=None, name=None):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000512 """
513 Initializer. Takes optional type, content, and name.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000514
515 The type, if given must be a token type (< 256). If not given,
516 this matches any *leaf* node; the content may still be required.
517
518 The content, if given, must be a string.
519
520 If a name is given, the matching node is stored in the results
521 dict under that key.
522 """
523 if type is not None:
524 assert 0 <= type < 256, type
525 if content is not None:
526 assert isinstance(content, basestring), repr(content)
527 self.type = type
528 self.content = content
529 self.name = name
530
531 def match(self, node, results=None):
532 """Override match() to insist on a leaf node."""
533 if not isinstance(node, Leaf):
534 return False
535 return BasePattern.match(self, node, results)
536
537 def _submatch(self, node, results=None):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000538 """
539 Match the pattern's content to the node's children.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000540
541 This assumes the node type matches and self.content is not None.
542
543 Returns True if it matches, False if not.
544
545 If results is not None, it must be a dict which will be
546 updated with the nodes matching named subpatterns.
547
548 When returning False, the results dict may still be updated.
549 """
550 return self.content == node.value
551
552
553class NodePattern(BasePattern):
554
555 wildcards = False
556
557 def __init__(self, type=None, content=None, name=None):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000558 """
559 Initializer. Takes optional type, content, and name.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000560
561 The type, if given, must be a symbol type (>= 256). If the
562 type is None this matches *any* single node (leaf or not),
563 except if content is not None, in which it only matches
564 non-leaf nodes that also match the content pattern.
565
566 The content, if not None, must be a sequence of Patterns that
567 must match the node's children exactly. If the content is
568 given, the type must not be None.
569
570 If a name is given, the matching node is stored in the results
571 dict under that key.
572 """
573 if type is not None:
574 assert type >= 256, type
575 if content is not None:
576 assert not isinstance(content, basestring), repr(content)
577 content = list(content)
578 for i, item in enumerate(content):
579 assert isinstance(item, BasePattern), (i, item)
580 if isinstance(item, WildcardPattern):
581 self.wildcards = True
582 self.type = type
583 self.content = content
584 self.name = name
585
586 def _submatch(self, node, results=None):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000587 """
588 Match the pattern's content to the node's children.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000589
590 This assumes the node type matches and self.content is not None.
591
592 Returns True if it matches, False if not.
593
594 If results is not None, it must be a dict which will be
595 updated with the nodes matching named subpatterns.
596
597 When returning False, the results dict may still be updated.
598 """
599 if self.wildcards:
600 for c, r in generate_matches(self.content, node.children):
601 if c == len(node.children):
602 if results is not None:
603 results.update(r)
604 return True
605 return False
606 if len(self.content) != len(node.children):
607 return False
608 for subpattern, child in zip(self.content, node.children):
609 if not subpattern.match(child, results):
610 return False
611 return True
612
613
614class WildcardPattern(BasePattern):
615
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000616 """
617 A wildcard pattern can match zero or more nodes.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000618
619 This has all the flexibility needed to implement patterns like:
620
621 .* .+ .? .{m,n}
622 (a b c | d e | f)
623 (...)* (...)+ (...)? (...){m,n}
624
625 except it always uses non-greedy matching.
626 """
627
628 def __init__(self, content=None, min=0, max=HUGE, name=None):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000629 """
630 Initializer.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000631
632 Args:
633 content: optional sequence of subsequences of patterns;
634 if absent, matches one node;
635 if present, each subsequence is an alternative [*]
636 min: optinal minumum number of times to match, default 0
637 max: optional maximum number of times tro match, default HUGE
638 name: optional name assigned to this match
639
640 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
641 equivalent to (a b c | d e | f g h); if content is None,
642 this is equivalent to '.' in regular expression terms.
643 The min and max parameters work as follows:
644 min=0, max=maxint: .*
645 min=1, max=maxint: .+
646 min=0, max=1: .?
647 min=1, max=1: .
648 If content is not None, replace the dot with the parenthesized
649 list of alternatives, e.g. (a b c | d e | f g h)*
650 """
651 assert 0 <= min <= max <= HUGE, (min, max)
652 if content is not None:
653 content = tuple(map(tuple, content)) # Protect against alterations
654 # Check sanity of alternatives
655 assert len(content), repr(content) # Can't have zero alternatives
656 for alt in content:
657 assert len(alt), repr(alt) # Can have empty alternatives
658 self.content = content
659 self.min = min
660 self.max = max
661 self.name = name
662
663 def optimize(self):
664 """Optimize certain stacked wildcard patterns."""
665 subpattern = None
666 if (self.content is not None and
667 len(self.content) == 1 and len(self.content[0]) == 1):
668 subpattern = self.content[0][0]
669 if self.min == 1 and self.max == 1:
670 if self.content is None:
671 return NodePattern(name=self.name)
672 if subpattern is not None and self.name == subpattern.name:
673 return subpattern.optimize()
674 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
675 subpattern.min <= 1 and self.name == subpattern.name):
676 return WildcardPattern(subpattern.content,
677 self.min*subpattern.min,
678 self.max*subpattern.max,
679 subpattern.name)
680 return self
681
682 def match(self, node, results=None):
683 """Does this pattern exactly match a node?"""
684 return self.match_seq([node], results)
685
686 def match_seq(self, nodes, results=None):
687 """Does this pattern exactly match a sequence of nodes?"""
688 for c, r in self.generate_matches(nodes):
689 if c == len(nodes):
690 if results is not None:
691 results.update(r)
692 if self.name:
693 results[self.name] = list(nodes)
694 return True
695 return False
696
697 def generate_matches(self, nodes):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000698 """
699 Generator yielding matches for a sequence of nodes.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000700
701 Args:
702 nodes: sequence of nodes
703
704 Yields:
705 (count, results) tuples where:
706 count: the match comprises nodes[:count];
707 results: dict containing named submatches.
708 """
709 if self.content is None:
710 # Shortcut for special case (see __init__.__doc__)
711 for count in xrange(self.min, 1 + min(len(nodes), self.max)):
712 r = {}
713 if self.name:
714 r[self.name] = nodes[:count]
715 yield count, r
Benjamin Peterson1ab31492008-07-17 02:07:46 +0000716 elif self.name == "bare_name":
717 yield self._bare_name_matches(nodes)
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000718 else:
Benjamin Peterson2555d9d2008-11-28 22:12:14 +0000719 # The reason for this is that hitting the recursion limit usually
720 # results in some ugly messages about how RuntimeErrors are being
721 # ignored.
722 save_stderr = sys.stderr
723 sys.stderr = StringIO()
Benjamin Peterson4d164152008-10-04 20:55:50 +0000724 try:
725 for count, r in self._recursive_matches(nodes, 0):
726 if self.name:
727 r[self.name] = nodes[:count]
728 yield count, r
729 except RuntimeError:
730 # We fall back to the iterative pattern matching scheme if the recursive
731 # scheme hits the recursion limit.
732 for count, r in self._iterative_matches(nodes):
733 if self.name:
734 r[self.name] = nodes[:count]
735 yield count, r
Benjamin Peterson2555d9d2008-11-28 22:12:14 +0000736 finally:
737 sys.stderr = save_stderr
Benjamin Peterson4d164152008-10-04 20:55:50 +0000738
739 def _iterative_matches(self, nodes):
740 """Helper to iteratively yield the matches."""
741 nodelen = len(nodes)
742 if 0 >= self.min:
743 yield 0, {}
744
745 results = []
746 # generate matches that use just one alt from self.content
747 for alt in self.content:
748 for c, r in generate_matches(alt, nodes):
749 yield c, r
750 results.append((c, r))
751
752 # for each match, iterate down the nodes
753 while results:
754 new_results = []
755 for c0, r0 in results:
756 # stop if the entire set of nodes has been matched
757 if c0 < nodelen and c0 <= self.max:
758 for alt in self.content:
759 for c1, r1 in generate_matches(alt, nodes[c0:]):
760 if c1 > 0:
761 r = {}
762 r.update(r0)
763 r.update(r1)
764 yield c0 + c1, r
765 new_results.append((c0 + c1, r))
766 results = new_results
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000767
Benjamin Peterson1ab31492008-07-17 02:07:46 +0000768 def _bare_name_matches(self, nodes):
769 """Special optimized matcher for bare_name."""
770 count = 0
771 r = {}
772 done = False
773 max = len(nodes)
774 while not done and count < max:
775 done = True
776 for leaf in self.content:
777 if leaf[0].match(nodes[count], r):
778 count += 1
779 done = False
780 break
781 r[self.name] = nodes[:count]
782 return count, r
783
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000784 def _recursive_matches(self, nodes, count):
785 """Helper to recursively yield the matches."""
786 assert self.content is not None
787 if count >= self.min:
Benjamin Peterson1ab31492008-07-17 02:07:46 +0000788 yield 0, {}
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000789 if count < self.max:
790 for alt in self.content:
791 for c0, r0 in generate_matches(alt, nodes):
792 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
793 r = {}
794 r.update(r0)
795 r.update(r1)
796 yield c0 + c1, r
797
798
799class NegatedPattern(BasePattern):
800
801 def __init__(self, content=None):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000802 """
803 Initializer.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000804
805 The argument is either a pattern or None. If it is None, this
806 only matches an empty sequence (effectively '$' in regex
807 lingo). If it is not None, this matches whenever the argument
808 pattern doesn't have any matches.
809 """
810 if content is not None:
811 assert isinstance(content, BasePattern), repr(content)
812 self.content = content
813
814 def match(self, node):
815 # We never match a node in its entirety
816 return False
817
818 def match_seq(self, nodes):
819 # We only match an empty sequence of nodes in its entirety
820 return len(nodes) == 0
821
822 def generate_matches(self, nodes):
823 if self.content is None:
824 # Return a match if there is an empty sequence
825 if len(nodes) == 0:
826 yield 0, {}
827 else:
828 # Return a match if the argument pattern has no matches
829 for c, r in self.content.generate_matches(nodes):
830 return
831 yield 0, {}
832
833
834def generate_matches(patterns, nodes):
Benjamin Petersoneaeb4c62009-05-05 23:13:58 +0000835 """
836 Generator yielding matches for a sequence of patterns and nodes.
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000837
838 Args:
839 patterns: a sequence of patterns
840 nodes: a sequence of nodes
841
842 Yields:
843 (count, results) tuples where:
844 count: the entire sequence of patterns matches nodes[:count];
845 results: dict containing named submatches.
846 """
847 if not patterns:
848 yield 0, {}
849 else:
850 p, rest = patterns[0], patterns[1:]
851 for c0, r0 in p.generate_matches(nodes):
852 if not rest:
853 yield c0, r0
854 else:
855 for c1, r1 in generate_matches(rest, nodes[c0:]):
856 r = {}
857 r.update(r0)
858 r.update(r1)
859 yield c0 + c1, r