blob: 38fda0c5592f824f03f82cb8f84c6bd90f7ce498 [file] [log] [blame]
Martin v. Löwisef04c442008-03-19 05:04:44 +00001# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
Benjamin Peterson608d8bc2009-05-05 23:23:31 +00004"""
5Python parse tree definitions.
Martin v. Löwisef04c442008-03-19 05:04:44 +00006
7This is a very concrete parse tree; we need to keep every token and
8even the comments and whitespace between tokens.
9
10There's also a pattern matching implementation here.
11"""
12
13__author__ = "Guido van Rossum <guido@python.org>"
14
Benjamin Peterson5cff9312008-11-28 23:01:28 +000015import sys
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +000016import warnings
Benjamin Peterson5cff9312008-11-28 23:01:28 +000017from io import StringIO
18
Martin v. Löwisef04c442008-03-19 05:04:44 +000019
20HUGE = 0x7FFFFFFF # maximum repeat count, default max
21
Martin v. Löwis3faa84f2008-03-22 00:07:09 +000022_type_reprs = {}
23def type_repr(type_num):
24 global _type_reprs
25 if not _type_reprs:
26 from .pygram import python_symbols
27 # printing tokens is possible but not as useful
28 # from .pgen2 import token // token.__dict__.items():
29 for name, val in python_symbols.__dict__.items():
30 if type(val) == int: _type_reprs[val] = name
31 return _type_reprs.setdefault(type_num, type_num)
32
Martin v. Löwisef04c442008-03-19 05:04:44 +000033
34class Base(object):
35
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000036 """
37 Abstract base class for Node and Leaf.
Martin v. Löwisef04c442008-03-19 05:04:44 +000038
39 This provides some default functionality and boilerplate using the
40 template pattern.
41
42 A node may be a subnode of at most one parent.
43 """
44
45 # Default values for instance variables
46 type = None # int: token number (< 256) or symbol number (>= 256)
47 parent = None # Parent node pointer, or None
48 children = () # Tuple of subnodes
49 was_changed = False
50
51 def __new__(cls, *args, **kwds):
52 """Constructor that prevents Base from being instantiated."""
53 assert cls is not Base, "Cannot instantiate Base"
54 return object.__new__(cls)
55
56 def __eq__(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000057 """
58 Compare two nodes for equality.
Martin v. Löwisef04c442008-03-19 05:04:44 +000059
60 This calls the method _eq().
61 """
62 if self.__class__ is not other.__class__:
63 return NotImplemented
64 return self._eq(other)
65
Benjamin Petersond9af52b2009-11-02 18:16:28 +000066 __hash__ = None # For Py3 compatibility.
67
Martin v. Löwisef04c442008-03-19 05:04:44 +000068 def __ne__(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000069 """
70 Compare two nodes for inequality.
Martin v. Löwisef04c442008-03-19 05:04:44 +000071
72 This calls the method _eq().
73 """
74 if self.__class__ is not other.__class__:
75 return NotImplemented
76 return not self._eq(other)
77
78 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000079 """
80 Compare two nodes for equality.
Martin v. Löwisef04c442008-03-19 05:04:44 +000081
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000082 This is called by __eq__ and __ne__. It is only called if the two nodes
83 have the same type. This must be implemented by the concrete subclass.
84 Nodes should be considered equal if they have the same structure,
85 ignoring the prefix string and other context information.
Martin v. Löwisef04c442008-03-19 05:04:44 +000086 """
87 raise NotImplementedError
88
89 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000090 """
91 Return a cloned (deep) copy of self.
Martin v. Löwisef04c442008-03-19 05:04:44 +000092
93 This must be implemented by the concrete subclass.
94 """
95 raise NotImplementedError
96
97 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000098 """
99 Return a post-order iterator for the tree.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000100
101 This must be implemented by the concrete subclass.
102 """
103 raise NotImplementedError
104
105 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000106 """
107 Return a pre-order iterator for the tree.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000108
109 This must be implemented by the concrete subclass.
110 """
111 raise NotImplementedError
112
113 def set_prefix(self, prefix):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000114 """
115 Set the prefix for the node (see Leaf class).
Martin v. Löwisef04c442008-03-19 05:04:44 +0000116
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000117 DEPRECATED; use the prefix property directly.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000118 """
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000119 warnings.warn("set_prefix() is deprecated; use the prefix property",
120 DeprecationWarning, stacklevel=2)
121 self.prefix = prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000122
123 def get_prefix(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000124 """
125 Return the prefix for the node (see Leaf class).
Martin v. Löwisef04c442008-03-19 05:04:44 +0000126
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000127 DEPRECATED; use the prefix property directly.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000128 """
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000129 warnings.warn("get_prefix() is deprecated; use the prefix property",
130 DeprecationWarning, stacklevel=2)
131 return self.prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000132
133 def replace(self, new):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000134 """Replace this node with a new one in the parent."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000135 assert self.parent is not None, str(self)
136 assert new is not None
137 if not isinstance(new, list):
138 new = [new]
139 l_children = []
140 found = False
141 for ch in self.parent.children:
142 if ch is self:
143 assert not found, (self.parent.children, self, new)
144 if new is not None:
145 l_children.extend(new)
146 found = True
147 else:
148 l_children.append(ch)
149 assert found, (self.children, self, new)
150 self.parent.changed()
151 self.parent.children = l_children
152 for x in new:
153 x.parent = self.parent
154 self.parent = None
155
156 def get_lineno(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000157 """Return the line number which generated the invocant node."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000158 node = self
159 while not isinstance(node, Leaf):
160 if not node.children:
161 return
162 node = node.children[0]
163 return node.lineno
164
165 def changed(self):
166 if self.parent:
167 self.parent.changed()
168 self.was_changed = True
169
170 def remove(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000171 """
172 Remove the node from the tree. Returns the position of the node in its
173 parent's children before it was removed.
174 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000175 if self.parent:
176 for i, node in enumerate(self.parent.children):
177 if node is self:
178 self.parent.changed()
179 del self.parent.children[i]
180 self.parent = None
181 return i
182
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000183 @property
184 def next_sibling(self):
185 """
186 The node immediately following the invocant in their parent's children
187 list. If the invocant does not have a next sibling, it is None
188 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000189 if self.parent is None:
190 return None
191
192 # Can't use index(); we need to test by identity
Martin v. Löwis3de92bf2008-04-10 02:50:50 +0000193 for i, child in enumerate(self.parent.children):
194 if child is self:
Martin v. Löwisef04c442008-03-19 05:04:44 +0000195 try:
196 return self.parent.children[i+1]
197 except IndexError:
198 return None
199
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000200 @property
201 def prev_sibling(self):
202 """
203 The node immediately preceding the invocant in their parent's children
204 list. If the invocant does not have a previous sibling, it is None.
205 """
Martin v. Löwis3de92bf2008-04-10 02:50:50 +0000206 if self.parent is None:
207 return None
208
209 # Can't use index(); we need to test by identity
210 for i, child in enumerate(self.parent.children):
211 if child is self:
212 if i == 0:
213 return None
214 return self.parent.children[i-1]
215
Martin v. Löwisef04c442008-03-19 05:04:44 +0000216 def get_suffix(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000217 """
218 Return the string immediately following the invocant node. This is
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000219 effectively equivalent to node.next_sibling.prefix
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000220 """
221 next_sib = self.next_sibling
Martin v. Löwisef04c442008-03-19 05:04:44 +0000222 if next_sib is None:
223 return ""
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000224 return next_sib.prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000225
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000226 if sys.version_info < (3, 0):
227 def __str__(self):
228 return str(self).encode("ascii")
229
Martin v. Löwisef04c442008-03-19 05:04:44 +0000230
231class Node(Base):
232
233 """Concrete implementation for interior nodes."""
234
235 def __init__(self, type, children, context=None, prefix=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000236 """
237 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000238
239 Takes a type constant (a symbol number >= 256), a sequence of
240 child nodes, and an optional context keyword argument.
241
242 As a side effect, the parent pointers of the children are updated.
243 """
244 assert type >= 256, type
245 self.type = type
246 self.children = list(children)
247 for ch in self.children:
248 assert ch.parent is None, repr(ch)
249 ch.parent = self
250 if prefix is not None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000251 self.prefix = prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000252
253 def __repr__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000254 """Return a canonical string representation."""
Martin v. Löwis3faa84f2008-03-22 00:07:09 +0000255 return "%s(%s, %r)" % (self.__class__.__name__,
256 type_repr(self.type),
Martin v. Löwisef04c442008-03-19 05:04:44 +0000257 self.children)
258
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000259 def __unicode__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000260 """
261 Return a pretty string representation.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000262
263 This reproduces the input source exactly.
264 """
265 return "".join(map(str, self.children))
266
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000267 if sys.version_info > (3, 0):
268 __str__ = __unicode__
269
Martin v. Löwisef04c442008-03-19 05:04:44 +0000270 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000271 """Compare two nodes for equality."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000272 return (self.type, self.children) == (other.type, other.children)
273
274 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000275 """Return a cloned (deep) copy of self."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000276 return Node(self.type, [ch.clone() for ch in self.children])
277
278 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000279 """Return a post-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000280 for child in self.children:
281 for node in child.post_order():
282 yield node
283 yield self
284
285 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000286 """Return a pre-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000287 yield self
288 for child in self.children:
Benjamin Peterson4eb5fa52010-08-08 19:01:25 +0000289 for node in child.pre_order():
Martin v. Löwisef04c442008-03-19 05:04:44 +0000290 yield node
291
Benjamin Peterson8d26b0b2010-05-07 19:10:11 +0000292 def _prefix_getter(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000293 """
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000294 The whitespace and comments preceding this node in the input.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000295 """
296 if not self.children:
297 return ""
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000298 return self.children[0].prefix
299
Benjamin Peterson8d26b0b2010-05-07 19:10:11 +0000300 def _prefix_setter(self, prefix):
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000301 if self.children:
302 self.children[0].prefix = prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000303
Benjamin Peterson8d26b0b2010-05-07 19:10:11 +0000304 prefix = property(_prefix_getter, _prefix_setter)
305
Martin v. Löwisef04c442008-03-19 05:04:44 +0000306 def set_child(self, i, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000307 """
308 Equivalent to 'node.children[i] = child'. This method also sets the
309 child's parent attribute appropriately.
310 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000311 child.parent = self
312 self.children[i].parent = None
313 self.children[i] = child
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000314 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000315
316 def insert_child(self, i, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000317 """
318 Equivalent to 'node.children.insert(i, child)'. This method also sets
319 the child's parent attribute appropriately.
320 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000321 child.parent = self
322 self.children.insert(i, child)
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000323 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000324
325 def append_child(self, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000326 """
327 Equivalent to 'node.children.append(child)'. This method also sets the
328 child's parent attribute appropriately.
329 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000330 child.parent = self
331 self.children.append(child)
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000332 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000333
334
335class Leaf(Base):
336
337 """Concrete implementation for leaf nodes."""
338
339 # Default values for instance variables
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000340 _prefix = "" # Whitespace and comments preceding this token in the input
341 lineno = 0 # Line where this token starts in the input
342 column = 0 # Column where this token tarts in the input
Martin v. Löwisef04c442008-03-19 05:04:44 +0000343
344 def __init__(self, type, value, context=None, prefix=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000345 """
346 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000347
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000348 Takes a type constant (a token number < 256), a string value, and an
349 optional context keyword argument.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000350 """
351 assert 0 <= type < 256, type
352 if context is not None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000353 self._prefix, (self.lineno, self.column) = context
Martin v. Löwisef04c442008-03-19 05:04:44 +0000354 self.type = type
355 self.value = value
356 if prefix is not None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000357 self._prefix = prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000358
359 def __repr__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000360 """Return a canonical string representation."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000361 return "%s(%r, %r)" % (self.__class__.__name__,
362 self.type,
363 self.value)
364
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000365 def __unicode__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000366 """
367 Return a pretty string representation.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000368
369 This reproduces the input source exactly.
370 """
371 return self.prefix + str(self.value)
372
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000373 if sys.version_info > (3, 0):
374 __str__ = __unicode__
375
Martin v. Löwisef04c442008-03-19 05:04:44 +0000376 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000377 """Compare two nodes for equality."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000378 return (self.type, self.value) == (other.type, other.value)
379
380 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000381 """Return a cloned (deep) copy of self."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000382 return Leaf(self.type, self.value,
383 (self.prefix, (self.lineno, self.column)))
384
385 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000386 """Return a post-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000387 yield self
388
389 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000390 """Return a pre-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000391 yield self
392
Benjamin Peterson8d26b0b2010-05-07 19:10:11 +0000393 def _prefix_getter(self):
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000394 """
395 The whitespace and comments preceding this token in the input.
396 """
397 return self._prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000398
Benjamin Peterson8d26b0b2010-05-07 19:10:11 +0000399 def _prefix_setter(self, prefix):
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000400 self.changed()
401 self._prefix = prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000402
Benjamin Peterson8d26b0b2010-05-07 19:10:11 +0000403 prefix = property(_prefix_getter, _prefix_setter)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000404
405def convert(gr, raw_node):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000406 """
407 Convert raw node information to a Node or Leaf instance.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000408
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000409 This is passed to the parser driver which calls it whenever a reduction of a
410 grammar rule produces a new complete node, so that the tree is build
411 strictly bottom-up.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000412 """
413 type, value, context, children = raw_node
414 if children or type in gr.number2symbol:
415 # If there's exactly one child, return that child instead of
416 # creating a new node.
417 if len(children) == 1:
418 return children[0]
419 return Node(type, children, context=context)
420 else:
421 return Leaf(type, value, context=context)
422
423
424class BasePattern(object):
425
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000426 """
427 A pattern is a tree matching pattern.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000428
429 It looks for a specific node type (token or symbol), and
430 optionally for a specific content.
431
432 This is an abstract base class. There are three concrete
433 subclasses:
434
435 - LeafPattern matches a single leaf node;
436 - NodePattern matches a single node (usually non-leaf);
437 - WildcardPattern matches a sequence of nodes of variable length.
438 """
439
440 # Defaults for instance variables
441 type = None # Node type (token if < 256, symbol if >= 256)
442 content = None # Optional content matching pattern
443 name = None # Optional name used to store match in results dict
444
445 def __new__(cls, *args, **kwds):
446 """Constructor that prevents BasePattern from being instantiated."""
447 assert cls is not BasePattern, "Cannot instantiate BasePattern"
448 return object.__new__(cls)
449
450 def __repr__(self):
Martin v. Löwis3faa84f2008-03-22 00:07:09 +0000451 args = [type_repr(self.type), self.content, self.name]
Martin v. Löwisef04c442008-03-19 05:04:44 +0000452 while args and args[-1] is None:
453 del args[-1]
454 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
455
456 def optimize(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000457 """
458 A subclass can define this as a hook for optimizations.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000459
460 Returns either self or another node with the same effect.
461 """
462 return self
463
464 def match(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000465 """
466 Does this pattern exactly match a node?
Martin v. Löwisef04c442008-03-19 05:04:44 +0000467
468 Returns True if it matches, False if not.
469
470 If results is not None, it must be a dict which will be
471 updated with the nodes matching named subpatterns.
472
473 Default implementation for non-wildcard patterns.
474 """
475 if self.type is not None and node.type != self.type:
476 return False
477 if self.content is not None:
478 r = None
479 if results is not None:
480 r = {}
481 if not self._submatch(node, r):
482 return False
483 if r:
484 results.update(r)
485 if results is not None and self.name:
486 results[self.name] = node
487 return True
488
489 def match_seq(self, nodes, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000490 """
491 Does this pattern exactly match a sequence of nodes?
Martin v. Löwisef04c442008-03-19 05:04:44 +0000492
493 Default implementation for non-wildcard patterns.
494 """
495 if len(nodes) != 1:
496 return False
497 return self.match(nodes[0], results)
498
499 def generate_matches(self, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000500 """
501 Generator yielding all matches for this pattern.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000502
503 Default implementation for non-wildcard patterns.
504 """
505 r = {}
506 if nodes and self.match(nodes[0], r):
507 yield 1, r
508
509
510class LeafPattern(BasePattern):
511
512 def __init__(self, type=None, content=None, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000513 """
514 Initializer. Takes optional type, content, and name.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000515
516 The type, if given must be a token type (< 256). If not given,
517 this matches any *leaf* node; the content may still be required.
518
519 The content, if given, must be a string.
520
521 If a name is given, the matching node is stored in the results
522 dict under that key.
523 """
524 if type is not None:
525 assert 0 <= type < 256, type
526 if content is not None:
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000527 assert isinstance(content, str), repr(content)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000528 self.type = type
529 self.content = content
530 self.name = name
531
532 def match(self, node, results=None):
533 """Override match() to insist on a leaf node."""
534 if not isinstance(node, Leaf):
535 return False
536 return BasePattern.match(self, node, results)
537
538 def _submatch(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000539 """
540 Match the pattern's content to the node's children.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000541
542 This assumes the node type matches and self.content is not None.
543
544 Returns True if it matches, False if not.
545
546 If results is not None, it must be a dict which will be
547 updated with the nodes matching named subpatterns.
548
549 When returning False, the results dict may still be updated.
550 """
551 return self.content == node.value
552
553
554class NodePattern(BasePattern):
555
556 wildcards = False
557
558 def __init__(self, type=None, content=None, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000559 """
560 Initializer. Takes optional type, content, and name.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000561
562 The type, if given, must be a symbol type (>= 256). If the
563 type is None this matches *any* single node (leaf or not),
564 except if content is not None, in which it only matches
565 non-leaf nodes that also match the content pattern.
566
567 The content, if not None, must be a sequence of Patterns that
568 must match the node's children exactly. If the content is
569 given, the type must not be None.
570
571 If a name is given, the matching node is stored in the results
572 dict under that key.
573 """
574 if type is not None:
575 assert type >= 256, type
576 if content is not None:
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000577 assert not isinstance(content, str), repr(content)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000578 content = list(content)
579 for i, item in enumerate(content):
580 assert isinstance(item, BasePattern), (i, item)
581 if isinstance(item, WildcardPattern):
582 self.wildcards = True
583 self.type = type
584 self.content = content
585 self.name = name
586
587 def _submatch(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000588 """
589 Match the pattern's content to the node's children.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000590
591 This assumes the node type matches and self.content is not None.
592
593 Returns True if it matches, False if not.
594
595 If results is not None, it must be a dict which will be
596 updated with the nodes matching named subpatterns.
597
598 When returning False, the results dict may still be updated.
599 """
600 if self.wildcards:
601 for c, r in generate_matches(self.content, node.children):
602 if c == len(node.children):
603 if results is not None:
604 results.update(r)
605 return True
606 return False
607 if len(self.content) != len(node.children):
608 return False
609 for subpattern, child in zip(self.content, node.children):
610 if not subpattern.match(child, results):
611 return False
612 return True
613
614
615class WildcardPattern(BasePattern):
616
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000617 """
618 A wildcard pattern can match zero or more nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000619
620 This has all the flexibility needed to implement patterns like:
621
622 .* .+ .? .{m,n}
623 (a b c | d e | f)
624 (...)* (...)+ (...)? (...){m,n}
625
626 except it always uses non-greedy matching.
627 """
628
629 def __init__(self, content=None, min=0, max=HUGE, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000630 """
631 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000632
633 Args:
634 content: optional sequence of subsequences of patterns;
635 if absent, matches one node;
636 if present, each subsequence is an alternative [*]
637 min: optinal minumum number of times to match, default 0
638 max: optional maximum number of times tro match, default HUGE
639 name: optional name assigned to this match
640
641 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
642 equivalent to (a b c | d e | f g h); if content is None,
643 this is equivalent to '.' in regular expression terms.
644 The min and max parameters work as follows:
645 min=0, max=maxint: .*
646 min=1, max=maxint: .+
647 min=0, max=1: .?
648 min=1, max=1: .
649 If content is not None, replace the dot with the parenthesized
650 list of alternatives, e.g. (a b c | d e | f g h)*
651 """
652 assert 0 <= min <= max <= HUGE, (min, max)
653 if content is not None:
654 content = tuple(map(tuple, content)) # Protect against alterations
655 # Check sanity of alternatives
656 assert len(content), repr(content) # Can't have zero alternatives
657 for alt in content:
658 assert len(alt), repr(alt) # Can have empty alternatives
659 self.content = content
660 self.min = min
661 self.max = max
662 self.name = name
663
664 def optimize(self):
665 """Optimize certain stacked wildcard patterns."""
666 subpattern = None
667 if (self.content is not None and
668 len(self.content) == 1 and len(self.content[0]) == 1):
669 subpattern = self.content[0][0]
670 if self.min == 1 and self.max == 1:
671 if self.content is None:
672 return NodePattern(name=self.name)
673 if subpattern is not None and self.name == subpattern.name:
674 return subpattern.optimize()
675 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
676 subpattern.min <= 1 and self.name == subpattern.name):
677 return WildcardPattern(subpattern.content,
678 self.min*subpattern.min,
679 self.max*subpattern.max,
680 subpattern.name)
681 return self
682
683 def match(self, node, results=None):
684 """Does this pattern exactly match a node?"""
685 return self.match_seq([node], results)
686
687 def match_seq(self, nodes, results=None):
688 """Does this pattern exactly match a sequence of nodes?"""
689 for c, r in self.generate_matches(nodes):
690 if c == len(nodes):
691 if results is not None:
692 results.update(r)
693 if self.name:
694 results[self.name] = list(nodes)
695 return True
696 return False
697
698 def generate_matches(self, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000699 """
700 Generator yielding matches for a sequence of nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000701
702 Args:
703 nodes: sequence of nodes
704
705 Yields:
706 (count, results) tuples where:
707 count: the match comprises nodes[:count];
708 results: dict containing named submatches.
709 """
710 if self.content is None:
711 # Shortcut for special case (see __init__.__doc__)
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000712 for count in range(self.min, 1 + min(len(nodes), self.max)):
Martin v. Löwisef04c442008-03-19 05:04:44 +0000713 r = {}
714 if self.name:
715 r[self.name] = nodes[:count]
716 yield count, r
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000717 elif self.name == "bare_name":
718 yield self._bare_name_matches(nodes)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000719 else:
Benjamin Peterson5cff9312008-11-28 23:01:28 +0000720 # The reason for this is that hitting the recursion limit usually
721 # results in some ugly messages about how RuntimeErrors are being
722 # ignored.
723 save_stderr = sys.stderr
724 sys.stderr = StringIO()
Benjamin Peterson7d8d9a52008-10-04 21:04:36 +0000725 try:
726 for count, r in self._recursive_matches(nodes, 0):
727 if self.name:
728 r[self.name] = nodes[:count]
729 yield count, r
730 except RuntimeError:
731 # We fall back to the iterative pattern matching scheme if the recursive
732 # scheme hits the recursion limit.
733 for count, r in self._iterative_matches(nodes):
734 if self.name:
735 r[self.name] = nodes[:count]
736 yield count, r
Benjamin Peterson5cff9312008-11-28 23:01:28 +0000737 finally:
738 sys.stderr = save_stderr
Benjamin Peterson7d8d9a52008-10-04 21:04:36 +0000739
740 def _iterative_matches(self, nodes):
741 """Helper to iteratively yield the matches."""
742 nodelen = len(nodes)
743 if 0 >= self.min:
744 yield 0, {}
745
746 results = []
747 # generate matches that use just one alt from self.content
748 for alt in self.content:
749 for c, r in generate_matches(alt, nodes):
750 yield c, r
751 results.append((c, r))
752
753 # for each match, iterate down the nodes
754 while results:
755 new_results = []
756 for c0, r0 in results:
757 # stop if the entire set of nodes has been matched
758 if c0 < nodelen and c0 <= self.max:
759 for alt in self.content:
760 for c1, r1 in generate_matches(alt, nodes[c0:]):
761 if c1 > 0:
762 r = {}
763 r.update(r0)
764 r.update(r1)
765 yield c0 + c1, r
766 new_results.append((c0 + c1, r))
767 results = new_results
Martin v. Löwisef04c442008-03-19 05:04:44 +0000768
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000769 def _bare_name_matches(self, nodes):
770 """Special optimized matcher for bare_name."""
771 count = 0
772 r = {}
773 done = False
774 max = len(nodes)
775 while not done and count < max:
776 done = True
777 for leaf in self.content:
778 if leaf[0].match(nodes[count], r):
779 count += 1
780 done = False
781 break
782 r[self.name] = nodes[:count]
783 return count, r
784
Martin v. Löwisef04c442008-03-19 05:04:44 +0000785 def _recursive_matches(self, nodes, count):
786 """Helper to recursively yield the matches."""
787 assert self.content is not None
788 if count >= self.min:
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000789 yield 0, {}
Martin v. Löwisef04c442008-03-19 05:04:44 +0000790 if count < self.max:
791 for alt in self.content:
792 for c0, r0 in generate_matches(alt, nodes):
793 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
794 r = {}
795 r.update(r0)
796 r.update(r1)
797 yield c0 + c1, r
798
799
800class NegatedPattern(BasePattern):
801
802 def __init__(self, content=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000803 """
804 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000805
806 The argument is either a pattern or None. If it is None, this
807 only matches an empty sequence (effectively '$' in regex
808 lingo). If it is not None, this matches whenever the argument
809 pattern doesn't have any matches.
810 """
811 if content is not None:
812 assert isinstance(content, BasePattern), repr(content)
813 self.content = content
814
815 def match(self, node):
816 # We never match a node in its entirety
817 return False
818
819 def match_seq(self, nodes):
820 # We only match an empty sequence of nodes in its entirety
821 return len(nodes) == 0
822
823 def generate_matches(self, nodes):
824 if self.content is None:
825 # Return a match if there is an empty sequence
826 if len(nodes) == 0:
827 yield 0, {}
828 else:
829 # Return a match if the argument pattern has no matches
830 for c, r in self.content.generate_matches(nodes):
831 return
832 yield 0, {}
833
834
835def generate_matches(patterns, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000836 """
837 Generator yielding matches for a sequence of patterns and nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000838
839 Args:
840 patterns: a sequence of patterns
841 nodes: a sequence of nodes
842
843 Yields:
844 (count, results) tuples where:
845 count: the entire sequence of patterns matches nodes[:count];
846 results: dict containing named submatches.
847 """
848 if not patterns:
849 yield 0, {}
850 else:
851 p, rest = patterns[0], patterns[1:]
852 for c0, r0 in p.generate_matches(nodes):
853 if not rest:
854 yield c0, r0
855 else:
856 for c1, r1 in generate_matches(rest, nodes[c0:]):
857 r = {}
858 r.update(r0)
859 r.update(r1)
860 yield c0 + c1, r