blob: 15b83f6d02841fe9065aa47cce324afc92d617f8 [file] [log] [blame]
Martin v. Löwisef04c442008-03-19 05:04:44 +00001# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
Benjamin Peterson608d8bc2009-05-05 23:23:31 +00004"""
5Python parse tree definitions.
Martin v. Löwisef04c442008-03-19 05:04:44 +00006
7This is a very concrete parse tree; we need to keep every token and
8even the comments and whitespace between tokens.
9
10There's also a pattern matching implementation here.
11"""
12
13__author__ = "Guido van Rossum <guido@python.org>"
14
Benjamin Peterson5cff9312008-11-28 23:01:28 +000015import sys
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +000016import warnings
Benjamin Peterson5cff9312008-11-28 23:01:28 +000017from io import StringIO
18
Martin v. Löwisef04c442008-03-19 05:04:44 +000019
20HUGE = 0x7FFFFFFF # maximum repeat count, default max
21
Martin v. Löwis3faa84f2008-03-22 00:07:09 +000022_type_reprs = {}
23def type_repr(type_num):
24 global _type_reprs
25 if not _type_reprs:
26 from .pygram import python_symbols
27 # printing tokens is possible but not as useful
28 # from .pgen2 import token // token.__dict__.items():
29 for name, val in python_symbols.__dict__.items():
30 if type(val) == int: _type_reprs[val] = name
31 return _type_reprs.setdefault(type_num, type_num)
32
Martin v. Löwisef04c442008-03-19 05:04:44 +000033
34class Base(object):
35
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000036 """
37 Abstract base class for Node and Leaf.
Martin v. Löwisef04c442008-03-19 05:04:44 +000038
39 This provides some default functionality and boilerplate using the
40 template pattern.
41
42 A node may be a subnode of at most one parent.
43 """
44
45 # Default values for instance variables
46 type = None # int: token number (< 256) or symbol number (>= 256)
47 parent = None # Parent node pointer, or None
48 children = () # Tuple of subnodes
49 was_changed = False
50
51 def __new__(cls, *args, **kwds):
52 """Constructor that prevents Base from being instantiated."""
53 assert cls is not Base, "Cannot instantiate Base"
54 return object.__new__(cls)
55
56 def __eq__(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000057 """
58 Compare two nodes for equality.
Martin v. Löwisef04c442008-03-19 05:04:44 +000059
60 This calls the method _eq().
61 """
62 if self.__class__ is not other.__class__:
63 return NotImplemented
64 return self._eq(other)
65
Benjamin Petersond9af52b2009-11-02 18:16:28 +000066 __hash__ = None # For Py3 compatibility.
67
Martin v. Löwisef04c442008-03-19 05:04:44 +000068 def __ne__(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000069 """
70 Compare two nodes for inequality.
Martin v. Löwisef04c442008-03-19 05:04:44 +000071
72 This calls the method _eq().
73 """
74 if self.__class__ is not other.__class__:
75 return NotImplemented
76 return not self._eq(other)
77
78 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000079 """
80 Compare two nodes for equality.
Martin v. Löwisef04c442008-03-19 05:04:44 +000081
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000082 This is called by __eq__ and __ne__. It is only called if the two nodes
83 have the same type. This must be implemented by the concrete subclass.
84 Nodes should be considered equal if they have the same structure,
85 ignoring the prefix string and other context information.
Martin v. Löwisef04c442008-03-19 05:04:44 +000086 """
87 raise NotImplementedError
88
89 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000090 """
91 Return a cloned (deep) copy of self.
Martin v. Löwisef04c442008-03-19 05:04:44 +000092
93 This must be implemented by the concrete subclass.
94 """
95 raise NotImplementedError
96
97 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000098 """
99 Return a post-order iterator for the tree.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000100
101 This must be implemented by the concrete subclass.
102 """
103 raise NotImplementedError
104
105 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000106 """
107 Return a pre-order iterator for the tree.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000108
109 This must be implemented by the concrete subclass.
110 """
111 raise NotImplementedError
112
113 def set_prefix(self, prefix):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000114 """
115 Set the prefix for the node (see Leaf class).
Martin v. Löwisef04c442008-03-19 05:04:44 +0000116
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000117 DEPRECATED; use the prefix property directly.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000118 """
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000119 warnings.warn("set_prefix() is deprecated; use the prefix property",
120 DeprecationWarning, stacklevel=2)
121 self.prefix = prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000122
123 def get_prefix(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000124 """
125 Return the prefix for the node (see Leaf class).
Martin v. Löwisef04c442008-03-19 05:04:44 +0000126
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000127 DEPRECATED; use the prefix property directly.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000128 """
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000129 warnings.warn("get_prefix() is deprecated; use the prefix property",
130 DeprecationWarning, stacklevel=2)
131 return self.prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000132
133 def replace(self, new):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000134 """Replace this node with a new one in the parent."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000135 assert self.parent is not None, str(self)
136 assert new is not None
137 if not isinstance(new, list):
138 new = [new]
139 l_children = []
140 found = False
141 for ch in self.parent.children:
142 if ch is self:
143 assert not found, (self.parent.children, self, new)
144 if new is not None:
145 l_children.extend(new)
146 found = True
147 else:
148 l_children.append(ch)
149 assert found, (self.children, self, new)
150 self.parent.changed()
151 self.parent.children = l_children
152 for x in new:
153 x.parent = self.parent
154 self.parent = None
155
156 def get_lineno(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000157 """Return the line number which generated the invocant node."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000158 node = self
159 while not isinstance(node, Leaf):
160 if not node.children:
161 return
162 node = node.children[0]
163 return node.lineno
164
165 def changed(self):
166 if self.parent:
167 self.parent.changed()
168 self.was_changed = True
169
170 def remove(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000171 """
172 Remove the node from the tree. Returns the position of the node in its
173 parent's children before it was removed.
174 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000175 if self.parent:
176 for i, node in enumerate(self.parent.children):
177 if node is self:
178 self.parent.changed()
179 del self.parent.children[i]
180 self.parent = None
181 return i
182
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000183 @property
184 def next_sibling(self):
185 """
186 The node immediately following the invocant in their parent's children
187 list. If the invocant does not have a next sibling, it is None
188 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000189 if self.parent is None:
190 return None
191
192 # Can't use index(); we need to test by identity
Martin v. Löwis3de92bf2008-04-10 02:50:50 +0000193 for i, child in enumerate(self.parent.children):
194 if child is self:
Martin v. Löwisef04c442008-03-19 05:04:44 +0000195 try:
196 return self.parent.children[i+1]
197 except IndexError:
198 return None
199
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000200 @property
201 def prev_sibling(self):
202 """
203 The node immediately preceding the invocant in their parent's children
204 list. If the invocant does not have a previous sibling, it is None.
205 """
Martin v. Löwis3de92bf2008-04-10 02:50:50 +0000206 if self.parent is None:
207 return None
208
209 # Can't use index(); we need to test by identity
210 for i, child in enumerate(self.parent.children):
211 if child is self:
212 if i == 0:
213 return None
214 return self.parent.children[i-1]
215
Martin v. Löwisef04c442008-03-19 05:04:44 +0000216 def get_suffix(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000217 """
218 Return the string immediately following the invocant node. This is
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000219 effectively equivalent to node.next_sibling.prefix
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000220 """
221 next_sib = self.next_sibling
Martin v. Löwisef04c442008-03-19 05:04:44 +0000222 if next_sib is None:
223 return ""
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000224 return next_sib.prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000225
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000226 if sys.version_info < (3, 0):
227 def __str__(self):
228 return str(self).encode("ascii")
229
Martin v. Löwisef04c442008-03-19 05:04:44 +0000230
231class Node(Base):
232
233 """Concrete implementation for interior nodes."""
234
235 def __init__(self, type, children, context=None, prefix=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000236 """
237 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000238
239 Takes a type constant (a symbol number >= 256), a sequence of
240 child nodes, and an optional context keyword argument.
241
242 As a side effect, the parent pointers of the children are updated.
243 """
244 assert type >= 256, type
245 self.type = type
246 self.children = list(children)
247 for ch in self.children:
248 assert ch.parent is None, repr(ch)
249 ch.parent = self
250 if prefix is not None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000251 self.prefix = prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000252
253 def __repr__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000254 """Return a canonical string representation."""
Martin v. Löwis3faa84f2008-03-22 00:07:09 +0000255 return "%s(%s, %r)" % (self.__class__.__name__,
256 type_repr(self.type),
Martin v. Löwisef04c442008-03-19 05:04:44 +0000257 self.children)
258
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000259 def __unicode__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000260 """
261 Return a pretty string representation.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000262
263 This reproduces the input source exactly.
264 """
265 return "".join(map(str, self.children))
266
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000267 if sys.version_info > (3, 0):
268 __str__ = __unicode__
269
Martin v. Löwisef04c442008-03-19 05:04:44 +0000270 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000271 """Compare two nodes for equality."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000272 return (self.type, self.children) == (other.type, other.children)
273
274 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000275 """Return a cloned (deep) copy of self."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000276 return Node(self.type, [ch.clone() for ch in self.children])
277
278 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000279 """Return a post-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000280 for child in self.children:
281 for node in child.post_order():
282 yield node
283 yield self
284
285 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000286 """Return a pre-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000287 yield self
288 for child in self.children:
289 for node in child.post_order():
290 yield node
291
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000292 @property
293 def prefix(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000294 """
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000295 The whitespace and comments preceding this node in the input.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000296 """
297 if not self.children:
298 return ""
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000299 return self.children[0].prefix
300
301 @prefix.setter
302 def prefix(self, prefix):
303 if self.children:
304 self.children[0].prefix = prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000305
306 def set_child(self, i, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000307 """
308 Equivalent to 'node.children[i] = child'. This method also sets the
309 child's parent attribute appropriately.
310 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000311 child.parent = self
312 self.children[i].parent = None
313 self.children[i] = child
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000314 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000315
316 def insert_child(self, i, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000317 """
318 Equivalent to 'node.children.insert(i, child)'. This method also sets
319 the child's parent attribute appropriately.
320 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000321 child.parent = self
322 self.children.insert(i, child)
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000323 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000324
325 def append_child(self, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000326 """
327 Equivalent to 'node.children.append(child)'. This method also sets the
328 child's parent attribute appropriately.
329 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000330 child.parent = self
331 self.children.append(child)
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000332 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000333
334
335class Leaf(Base):
336
337 """Concrete implementation for leaf nodes."""
338
339 # Default values for instance variables
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000340 _prefix = "" # Whitespace and comments preceding this token in the input
341 lineno = 0 # Line where this token starts in the input
342 column = 0 # Column where this token tarts in the input
Martin v. Löwisef04c442008-03-19 05:04:44 +0000343
344 def __init__(self, type, value, context=None, prefix=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000345 """
346 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000347
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000348 Takes a type constant (a token number < 256), a string value, and an
349 optional context keyword argument.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000350 """
351 assert 0 <= type < 256, type
352 if context is not None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000353 self._prefix, (self.lineno, self.column) = context
Martin v. Löwisef04c442008-03-19 05:04:44 +0000354 self.type = type
355 self.value = value
356 if prefix is not None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000357 self._prefix = prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000358
359 def __repr__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000360 """Return a canonical string representation."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000361 return "%s(%r, %r)" % (self.__class__.__name__,
362 self.type,
363 self.value)
364
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000365 def __unicode__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000366 """
367 Return a pretty string representation.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000368
369 This reproduces the input source exactly.
370 """
371 return self.prefix + str(self.value)
372
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000373 if sys.version_info > (3, 0):
374 __str__ = __unicode__
375
Martin v. Löwisef04c442008-03-19 05:04:44 +0000376 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000377 """Compare two nodes for equality."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000378 return (self.type, self.value) == (other.type, other.value)
379
380 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000381 """Return a cloned (deep) copy of self."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000382 return Leaf(self.type, self.value,
383 (self.prefix, (self.lineno, self.column)))
384
385 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000386 """Return a post-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000387 yield self
388
389 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000390 """Return a pre-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000391 yield self
392
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000393 @property
394 def prefix(self):
395 """
396 The whitespace and comments preceding this token in the input.
397 """
398 return self._prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000399
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000400 @prefix.setter
401 def prefix(self, prefix):
402 self.changed()
403 self._prefix = prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000404
405
406def convert(gr, raw_node):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000407 """
408 Convert raw node information to a Node or Leaf instance.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000409
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000410 This is passed to the parser driver which calls it whenever a reduction of a
411 grammar rule produces a new complete node, so that the tree is build
412 strictly bottom-up.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000413 """
414 type, value, context, children = raw_node
415 if children or type in gr.number2symbol:
416 # If there's exactly one child, return that child instead of
417 # creating a new node.
418 if len(children) == 1:
419 return children[0]
420 return Node(type, children, context=context)
421 else:
422 return Leaf(type, value, context=context)
423
424
425class BasePattern(object):
426
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000427 """
428 A pattern is a tree matching pattern.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000429
430 It looks for a specific node type (token or symbol), and
431 optionally for a specific content.
432
433 This is an abstract base class. There are three concrete
434 subclasses:
435
436 - LeafPattern matches a single leaf node;
437 - NodePattern matches a single node (usually non-leaf);
438 - WildcardPattern matches a sequence of nodes of variable length.
439 """
440
441 # Defaults for instance variables
442 type = None # Node type (token if < 256, symbol if >= 256)
443 content = None # Optional content matching pattern
444 name = None # Optional name used to store match in results dict
445
446 def __new__(cls, *args, **kwds):
447 """Constructor that prevents BasePattern from being instantiated."""
448 assert cls is not BasePattern, "Cannot instantiate BasePattern"
449 return object.__new__(cls)
450
451 def __repr__(self):
Martin v. Löwis3faa84f2008-03-22 00:07:09 +0000452 args = [type_repr(self.type), self.content, self.name]
Martin v. Löwisef04c442008-03-19 05:04:44 +0000453 while args and args[-1] is None:
454 del args[-1]
455 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
456
457 def optimize(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000458 """
459 A subclass can define this as a hook for optimizations.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000460
461 Returns either self or another node with the same effect.
462 """
463 return self
464
465 def match(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000466 """
467 Does this pattern exactly match a node?
Martin v. Löwisef04c442008-03-19 05:04:44 +0000468
469 Returns True if it matches, False if not.
470
471 If results is not None, it must be a dict which will be
472 updated with the nodes matching named subpatterns.
473
474 Default implementation for non-wildcard patterns.
475 """
476 if self.type is not None and node.type != self.type:
477 return False
478 if self.content is not None:
479 r = None
480 if results is not None:
481 r = {}
482 if not self._submatch(node, r):
483 return False
484 if r:
485 results.update(r)
486 if results is not None and self.name:
487 results[self.name] = node
488 return True
489
490 def match_seq(self, nodes, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000491 """
492 Does this pattern exactly match a sequence of nodes?
Martin v. Löwisef04c442008-03-19 05:04:44 +0000493
494 Default implementation for non-wildcard patterns.
495 """
496 if len(nodes) != 1:
497 return False
498 return self.match(nodes[0], results)
499
500 def generate_matches(self, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000501 """
502 Generator yielding all matches for this pattern.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000503
504 Default implementation for non-wildcard patterns.
505 """
506 r = {}
507 if nodes and self.match(nodes[0], r):
508 yield 1, r
509
510
511class LeafPattern(BasePattern):
512
513 def __init__(self, type=None, content=None, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000514 """
515 Initializer. Takes optional type, content, and name.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000516
517 The type, if given must be a token type (< 256). If not given,
518 this matches any *leaf* node; the content may still be required.
519
520 The content, if given, must be a string.
521
522 If a name is given, the matching node is stored in the results
523 dict under that key.
524 """
525 if type is not None:
526 assert 0 <= type < 256, type
527 if content is not None:
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000528 assert isinstance(content, str), repr(content)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000529 self.type = type
530 self.content = content
531 self.name = name
532
533 def match(self, node, results=None):
534 """Override match() to insist on a leaf node."""
535 if not isinstance(node, Leaf):
536 return False
537 return BasePattern.match(self, node, results)
538
539 def _submatch(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000540 """
541 Match the pattern's content to the node's children.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000542
543 This assumes the node type matches and self.content is not None.
544
545 Returns True if it matches, False if not.
546
547 If results is not None, it must be a dict which will be
548 updated with the nodes matching named subpatterns.
549
550 When returning False, the results dict may still be updated.
551 """
552 return self.content == node.value
553
554
555class NodePattern(BasePattern):
556
557 wildcards = False
558
559 def __init__(self, type=None, content=None, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000560 """
561 Initializer. Takes optional type, content, and name.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000562
563 The type, if given, must be a symbol type (>= 256). If the
564 type is None this matches *any* single node (leaf or not),
565 except if content is not None, in which it only matches
566 non-leaf nodes that also match the content pattern.
567
568 The content, if not None, must be a sequence of Patterns that
569 must match the node's children exactly. If the content is
570 given, the type must not be None.
571
572 If a name is given, the matching node is stored in the results
573 dict under that key.
574 """
575 if type is not None:
576 assert type >= 256, type
577 if content is not None:
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000578 assert not isinstance(content, str), repr(content)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000579 content = list(content)
580 for i, item in enumerate(content):
581 assert isinstance(item, BasePattern), (i, item)
582 if isinstance(item, WildcardPattern):
583 self.wildcards = True
584 self.type = type
585 self.content = content
586 self.name = name
587
588 def _submatch(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000589 """
590 Match the pattern's content to the node's children.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000591
592 This assumes the node type matches and self.content is not None.
593
594 Returns True if it matches, False if not.
595
596 If results is not None, it must be a dict which will be
597 updated with the nodes matching named subpatterns.
598
599 When returning False, the results dict may still be updated.
600 """
601 if self.wildcards:
602 for c, r in generate_matches(self.content, node.children):
603 if c == len(node.children):
604 if results is not None:
605 results.update(r)
606 return True
607 return False
608 if len(self.content) != len(node.children):
609 return False
610 for subpattern, child in zip(self.content, node.children):
611 if not subpattern.match(child, results):
612 return False
613 return True
614
615
616class WildcardPattern(BasePattern):
617
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000618 """
619 A wildcard pattern can match zero or more nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000620
621 This has all the flexibility needed to implement patterns like:
622
623 .* .+ .? .{m,n}
624 (a b c | d e | f)
625 (...)* (...)+ (...)? (...){m,n}
626
627 except it always uses non-greedy matching.
628 """
629
630 def __init__(self, content=None, min=0, max=HUGE, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000631 """
632 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000633
634 Args:
635 content: optional sequence of subsequences of patterns;
636 if absent, matches one node;
637 if present, each subsequence is an alternative [*]
638 min: optinal minumum number of times to match, default 0
639 max: optional maximum number of times tro match, default HUGE
640 name: optional name assigned to this match
641
642 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
643 equivalent to (a b c | d e | f g h); if content is None,
644 this is equivalent to '.' in regular expression terms.
645 The min and max parameters work as follows:
646 min=0, max=maxint: .*
647 min=1, max=maxint: .+
648 min=0, max=1: .?
649 min=1, max=1: .
650 If content is not None, replace the dot with the parenthesized
651 list of alternatives, e.g. (a b c | d e | f g h)*
652 """
653 assert 0 <= min <= max <= HUGE, (min, max)
654 if content is not None:
655 content = tuple(map(tuple, content)) # Protect against alterations
656 # Check sanity of alternatives
657 assert len(content), repr(content) # Can't have zero alternatives
658 for alt in content:
659 assert len(alt), repr(alt) # Can have empty alternatives
660 self.content = content
661 self.min = min
662 self.max = max
663 self.name = name
664
665 def optimize(self):
666 """Optimize certain stacked wildcard patterns."""
667 subpattern = None
668 if (self.content is not None and
669 len(self.content) == 1 and len(self.content[0]) == 1):
670 subpattern = self.content[0][0]
671 if self.min == 1 and self.max == 1:
672 if self.content is None:
673 return NodePattern(name=self.name)
674 if subpattern is not None and self.name == subpattern.name:
675 return subpattern.optimize()
676 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
677 subpattern.min <= 1 and self.name == subpattern.name):
678 return WildcardPattern(subpattern.content,
679 self.min*subpattern.min,
680 self.max*subpattern.max,
681 subpattern.name)
682 return self
683
684 def match(self, node, results=None):
685 """Does this pattern exactly match a node?"""
686 return self.match_seq([node], results)
687
688 def match_seq(self, nodes, results=None):
689 """Does this pattern exactly match a sequence of nodes?"""
690 for c, r in self.generate_matches(nodes):
691 if c == len(nodes):
692 if results is not None:
693 results.update(r)
694 if self.name:
695 results[self.name] = list(nodes)
696 return True
697 return False
698
699 def generate_matches(self, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000700 """
701 Generator yielding matches for a sequence of nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000702
703 Args:
704 nodes: sequence of nodes
705
706 Yields:
707 (count, results) tuples where:
708 count: the match comprises nodes[:count];
709 results: dict containing named submatches.
710 """
711 if self.content is None:
712 # Shortcut for special case (see __init__.__doc__)
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000713 for count in range(self.min, 1 + min(len(nodes), self.max)):
Martin v. Löwisef04c442008-03-19 05:04:44 +0000714 r = {}
715 if self.name:
716 r[self.name] = nodes[:count]
717 yield count, r
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000718 elif self.name == "bare_name":
719 yield self._bare_name_matches(nodes)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000720 else:
Benjamin Peterson5cff9312008-11-28 23:01:28 +0000721 # The reason for this is that hitting the recursion limit usually
722 # results in some ugly messages about how RuntimeErrors are being
723 # ignored.
724 save_stderr = sys.stderr
725 sys.stderr = StringIO()
Benjamin Peterson7d8d9a52008-10-04 21:04:36 +0000726 try:
727 for count, r in self._recursive_matches(nodes, 0):
728 if self.name:
729 r[self.name] = nodes[:count]
730 yield count, r
731 except RuntimeError:
732 # We fall back to the iterative pattern matching scheme if the recursive
733 # scheme hits the recursion limit.
734 for count, r in self._iterative_matches(nodes):
735 if self.name:
736 r[self.name] = nodes[:count]
737 yield count, r
Benjamin Peterson5cff9312008-11-28 23:01:28 +0000738 finally:
739 sys.stderr = save_stderr
Benjamin Peterson7d8d9a52008-10-04 21:04:36 +0000740
741 def _iterative_matches(self, nodes):
742 """Helper to iteratively yield the matches."""
743 nodelen = len(nodes)
744 if 0 >= self.min:
745 yield 0, {}
746
747 results = []
748 # generate matches that use just one alt from self.content
749 for alt in self.content:
750 for c, r in generate_matches(alt, nodes):
751 yield c, r
752 results.append((c, r))
753
754 # for each match, iterate down the nodes
755 while results:
756 new_results = []
757 for c0, r0 in results:
758 # stop if the entire set of nodes has been matched
759 if c0 < nodelen and c0 <= self.max:
760 for alt in self.content:
761 for c1, r1 in generate_matches(alt, nodes[c0:]):
762 if c1 > 0:
763 r = {}
764 r.update(r0)
765 r.update(r1)
766 yield c0 + c1, r
767 new_results.append((c0 + c1, r))
768 results = new_results
Martin v. Löwisef04c442008-03-19 05:04:44 +0000769
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000770 def _bare_name_matches(self, nodes):
771 """Special optimized matcher for bare_name."""
772 count = 0
773 r = {}
774 done = False
775 max = len(nodes)
776 while not done and count < max:
777 done = True
778 for leaf in self.content:
779 if leaf[0].match(nodes[count], r):
780 count += 1
781 done = False
782 break
783 r[self.name] = nodes[:count]
784 return count, r
785
Martin v. Löwisef04c442008-03-19 05:04:44 +0000786 def _recursive_matches(self, nodes, count):
787 """Helper to recursively yield the matches."""
788 assert self.content is not None
789 if count >= self.min:
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000790 yield 0, {}
Martin v. Löwisef04c442008-03-19 05:04:44 +0000791 if count < self.max:
792 for alt in self.content:
793 for c0, r0 in generate_matches(alt, nodes):
794 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
795 r = {}
796 r.update(r0)
797 r.update(r1)
798 yield c0 + c1, r
799
800
801class NegatedPattern(BasePattern):
802
803 def __init__(self, content=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000804 """
805 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000806
807 The argument is either a pattern or None. If it is None, this
808 only matches an empty sequence (effectively '$' in regex
809 lingo). If it is not None, this matches whenever the argument
810 pattern doesn't have any matches.
811 """
812 if content is not None:
813 assert isinstance(content, BasePattern), repr(content)
814 self.content = content
815
816 def match(self, node):
817 # We never match a node in its entirety
818 return False
819
820 def match_seq(self, nodes):
821 # We only match an empty sequence of nodes in its entirety
822 return len(nodes) == 0
823
824 def generate_matches(self, nodes):
825 if self.content is None:
826 # Return a match if there is an empty sequence
827 if len(nodes) == 0:
828 yield 0, {}
829 else:
830 # Return a match if the argument pattern has no matches
831 for c, r in self.content.generate_matches(nodes):
832 return
833 yield 0, {}
834
835
836def generate_matches(patterns, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000837 """
838 Generator yielding matches for a sequence of patterns and nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000839
840 Args:
841 patterns: a sequence of patterns
842 nodes: a sequence of nodes
843
844 Yields:
845 (count, results) tuples where:
846 count: the entire sequence of patterns matches nodes[:count];
847 results: dict containing named submatches.
848 """
849 if not patterns:
850 yield 0, {}
851 else:
852 p, rest = patterns[0], patterns[1:]
853 for c0, r0 in p.generate_matches(nodes):
854 if not rest:
855 yield c0, r0
856 else:
857 for c1, r1 in generate_matches(rest, nodes[c0:]):
858 r = {}
859 r.update(r0)
860 r.update(r1)
861 yield c0 + c1, r