blob: c4a1be3500b5a11b8dd5fcc14397bbb2dd9f4688 [file] [log] [blame]
Martin v. Löwisef04c442008-03-19 05:04:44 +00001# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
Benjamin Peterson608d8bc2009-05-05 23:23:31 +00004"""
5Python parse tree definitions.
Martin v. Löwisef04c442008-03-19 05:04:44 +00006
7This is a very concrete parse tree; we need to keep every token and
8even the comments and whitespace between tokens.
9
10There's also a pattern matching implementation here.
11"""
12
13__author__ = "Guido van Rossum <guido@python.org>"
14
Benjamin Peterson5cff9312008-11-28 23:01:28 +000015import sys
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +000016import warnings
Benjamin Peterson5cff9312008-11-28 23:01:28 +000017from io import StringIO
18
Martin v. Löwisef04c442008-03-19 05:04:44 +000019HUGE = 0x7FFFFFFF # maximum repeat count, default max
20
Martin v. Löwis3faa84f2008-03-22 00:07:09 +000021_type_reprs = {}
22def type_repr(type_num):
23 global _type_reprs
24 if not _type_reprs:
25 from .pygram import python_symbols
26 # printing tokens is possible but not as useful
27 # from .pgen2 import token // token.__dict__.items():
28 for name, val in python_symbols.__dict__.items():
29 if type(val) == int: _type_reprs[val] = name
30 return _type_reprs.setdefault(type_num, type_num)
31
Martin v. Löwisef04c442008-03-19 05:04:44 +000032class Base(object):
33
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000034 """
35 Abstract base class for Node and Leaf.
Martin v. Löwisef04c442008-03-19 05:04:44 +000036
37 This provides some default functionality and boilerplate using the
38 template pattern.
39
40 A node may be a subnode of at most one parent.
41 """
42
43 # Default values for instance variables
44 type = None # int: token number (< 256) or symbol number (>= 256)
45 parent = None # Parent node pointer, or None
46 children = () # Tuple of subnodes
47 was_changed = False
Benjamin Petersonb0871ca2010-10-14 23:03:32 +000048 was_checked = False
Martin v. Löwisef04c442008-03-19 05:04:44 +000049
50 def __new__(cls, *args, **kwds):
51 """Constructor that prevents Base from being instantiated."""
52 assert cls is not Base, "Cannot instantiate Base"
53 return object.__new__(cls)
54
55 def __eq__(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000056 """
57 Compare two nodes for equality.
Martin v. Löwisef04c442008-03-19 05:04:44 +000058
59 This calls the method _eq().
60 """
61 if self.__class__ is not other.__class__:
62 return NotImplemented
63 return self._eq(other)
64
Benjamin Petersone80b51f2009-11-02 18:33:36 +000065 __hash__ = None # For Py3 compatibility.
66
Martin v. Löwisef04c442008-03-19 05:04:44 +000067 def __ne__(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000068 """
69 Compare two nodes for inequality.
Martin v. Löwisef04c442008-03-19 05:04:44 +000070
71 This calls the method _eq().
72 """
73 if self.__class__ is not other.__class__:
74 return NotImplemented
75 return not self._eq(other)
76
77 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000078 """
79 Compare two nodes for equality.
Martin v. Löwisef04c442008-03-19 05:04:44 +000080
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000081 This is called by __eq__ and __ne__. It is only called if the two nodes
82 have the same type. This must be implemented by the concrete subclass.
83 Nodes should be considered equal if they have the same structure,
84 ignoring the prefix string and other context information.
Martin v. Löwisef04c442008-03-19 05:04:44 +000085 """
86 raise NotImplementedError
87
88 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000089 """
90 Return a cloned (deep) copy of self.
Martin v. Löwisef04c442008-03-19 05:04:44 +000091
92 This must be implemented by the concrete subclass.
93 """
94 raise NotImplementedError
95
96 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000097 """
98 Return a post-order iterator for the tree.
Martin v. Löwisef04c442008-03-19 05:04:44 +000099
100 This must be implemented by the concrete subclass.
101 """
102 raise NotImplementedError
103
104 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000105 """
106 Return a pre-order iterator for the tree.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000107
108 This must be implemented by the concrete subclass.
109 """
110 raise NotImplementedError
111
Martin v. Löwisef04c442008-03-19 05:04:44 +0000112 def replace(self, new):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000113 """Replace this node with a new one in the parent."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000114 assert self.parent is not None, str(self)
115 assert new is not None
116 if not isinstance(new, list):
117 new = [new]
118 l_children = []
119 found = False
120 for ch in self.parent.children:
121 if ch is self:
122 assert not found, (self.parent.children, self, new)
123 if new is not None:
124 l_children.extend(new)
125 found = True
126 else:
127 l_children.append(ch)
128 assert found, (self.children, self, new)
129 self.parent.changed()
130 self.parent.children = l_children
131 for x in new:
132 x.parent = self.parent
133 self.parent = None
134
135 def get_lineno(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000136 """Return the line number which generated the invocant node."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000137 node = self
138 while not isinstance(node, Leaf):
139 if not node.children:
140 return
141 node = node.children[0]
142 return node.lineno
143
144 def changed(self):
145 if self.parent:
146 self.parent.changed()
147 self.was_changed = True
148
149 def remove(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000150 """
151 Remove the node from the tree. Returns the position of the node in its
152 parent's children before it was removed.
153 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000154 if self.parent:
155 for i, node in enumerate(self.parent.children):
156 if node is self:
157 self.parent.changed()
158 del self.parent.children[i]
159 self.parent = None
160 return i
161
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000162 @property
163 def next_sibling(self):
164 """
165 The node immediately following the invocant in their parent's children
166 list. If the invocant does not have a next sibling, it is None
167 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000168 if self.parent is None:
169 return None
170
171 # Can't use index(); we need to test by identity
Martin v. Löwis3de92bf2008-04-10 02:50:50 +0000172 for i, child in enumerate(self.parent.children):
173 if child is self:
Martin v. Löwisef04c442008-03-19 05:04:44 +0000174 try:
175 return self.parent.children[i+1]
176 except IndexError:
177 return None
178
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000179 @property
180 def prev_sibling(self):
181 """
182 The node immediately preceding the invocant in their parent's children
183 list. If the invocant does not have a previous sibling, it is None.
184 """
Martin v. Löwis3de92bf2008-04-10 02:50:50 +0000185 if self.parent is None:
186 return None
187
188 # Can't use index(); we need to test by identity
189 for i, child in enumerate(self.parent.children):
190 if child is self:
191 if i == 0:
192 return None
193 return self.parent.children[i-1]
194
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000195 def leaves(self):
196 for child in self.children:
Andrew Svetlov7d140152012-10-06 17:11:45 +0300197 yield from child.leaves()
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000198
199 def depth(self):
200 if self.parent is None:
201 return 0
202 return 1 + self.parent.depth()
203
Martin v. Löwisef04c442008-03-19 05:04:44 +0000204 def get_suffix(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000205 """
206 Return the string immediately following the invocant node. This is
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000207 effectively equivalent to node.next_sibling.prefix
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000208 """
209 next_sib = self.next_sibling
Martin v. Löwisef04c442008-03-19 05:04:44 +0000210 if next_sib is None:
211 return ""
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000212 return next_sib.prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000213
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000214 if sys.version_info < (3, 0):
215 def __str__(self):
216 return str(self).encode("ascii")
217
Martin v. Löwisef04c442008-03-19 05:04:44 +0000218class Node(Base):
219
220 """Concrete implementation for interior nodes."""
221
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000222 def __init__(self,type, children,
223 context=None,
224 prefix=None,
225 fixers_applied=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000226 """
227 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000228
229 Takes a type constant (a symbol number >= 256), a sequence of
230 child nodes, and an optional context keyword argument.
231
232 As a side effect, the parent pointers of the children are updated.
233 """
234 assert type >= 256, type
235 self.type = type
236 self.children = list(children)
237 for ch in self.children:
238 assert ch.parent is None, repr(ch)
239 ch.parent = self
240 if prefix is not None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000241 self.prefix = prefix
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000242 if fixers_applied:
243 self.fixers_applied = fixers_applied[:]
244 else:
245 self.fixers_applied = None
Martin v. Löwisef04c442008-03-19 05:04:44 +0000246
247 def __repr__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000248 """Return a canonical string representation."""
Martin v. Löwis3faa84f2008-03-22 00:07:09 +0000249 return "%s(%s, %r)" % (self.__class__.__name__,
250 type_repr(self.type),
Martin v. Löwisef04c442008-03-19 05:04:44 +0000251 self.children)
252
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000253 def __unicode__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000254 """
255 Return a pretty string representation.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000256
257 This reproduces the input source exactly.
258 """
259 return "".join(map(str, self.children))
260
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000261 if sys.version_info > (3, 0):
262 __str__ = __unicode__
263
Martin v. Löwisef04c442008-03-19 05:04:44 +0000264 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000265 """Compare two nodes for equality."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000266 return (self.type, self.children) == (other.type, other.children)
267
268 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000269 """Return a cloned (deep) copy of self."""
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000270 return Node(self.type, [ch.clone() for ch in self.children],
271 fixers_applied=self.fixers_applied)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000272
273 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000274 """Return a post-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000275 for child in self.children:
Andrew Svetlov7d140152012-10-06 17:11:45 +0300276 yield from child.post_order()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000277 yield self
278
279 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000280 """Return a pre-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000281 yield self
282 for child in self.children:
Andrew Svetlov7d140152012-10-06 17:11:45 +0300283 yield from child.pre_order()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000284
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000285 def _prefix_getter(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000286 """
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000287 The whitespace and comments preceding this node in the input.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000288 """
289 if not self.children:
290 return ""
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000291 return self.children[0].prefix
292
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000293 def _prefix_setter(self, prefix):
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000294 if self.children:
295 self.children[0].prefix = prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000296
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000297 prefix = property(_prefix_getter, _prefix_setter)
298
Martin v. Löwisef04c442008-03-19 05:04:44 +0000299 def set_child(self, i, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000300 """
301 Equivalent to 'node.children[i] = child'. This method also sets the
302 child's parent attribute appropriately.
303 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000304 child.parent = self
305 self.children[i].parent = None
306 self.children[i] = child
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000307 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000308
309 def insert_child(self, i, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000310 """
311 Equivalent to 'node.children.insert(i, child)'. This method also sets
312 the child's parent attribute appropriately.
313 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000314 child.parent = self
315 self.children.insert(i, child)
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000316 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000317
318 def append_child(self, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000319 """
320 Equivalent to 'node.children.append(child)'. This method also sets the
321 child's parent attribute appropriately.
322 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000323 child.parent = self
324 self.children.append(child)
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000325 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000326
327
328class Leaf(Base):
329
330 """Concrete implementation for leaf nodes."""
331
332 # Default values for instance variables
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000333 _prefix = "" # Whitespace and comments preceding this token in the input
334 lineno = 0 # Line where this token starts in the input
335 column = 0 # Column where this token tarts in the input
Martin v. Löwisef04c442008-03-19 05:04:44 +0000336
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000337 def __init__(self, type, value,
338 context=None,
339 prefix=None,
340 fixers_applied=[]):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000341 """
342 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000343
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000344 Takes a type constant (a token number < 256), a string value, and an
345 optional context keyword argument.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000346 """
347 assert 0 <= type < 256, type
348 if context is not None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000349 self._prefix, (self.lineno, self.column) = context
Martin v. Löwisef04c442008-03-19 05:04:44 +0000350 self.type = type
351 self.value = value
352 if prefix is not None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000353 self._prefix = prefix
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000354 self.fixers_applied = fixers_applied[:]
Martin v. Löwisef04c442008-03-19 05:04:44 +0000355
356 def __repr__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000357 """Return a canonical string representation."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000358 return "%s(%r, %r)" % (self.__class__.__name__,
359 self.type,
360 self.value)
361
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000362 def __unicode__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000363 """
364 Return a pretty string representation.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000365
366 This reproduces the input source exactly.
367 """
368 return self.prefix + str(self.value)
369
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000370 if sys.version_info > (3, 0):
371 __str__ = __unicode__
372
Martin v. Löwisef04c442008-03-19 05:04:44 +0000373 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000374 """Compare two nodes for equality."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000375 return (self.type, self.value) == (other.type, other.value)
376
377 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000378 """Return a cloned (deep) copy of self."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000379 return Leaf(self.type, self.value,
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000380 (self.prefix, (self.lineno, self.column)),
381 fixers_applied=self.fixers_applied)
382
383 def leaves(self):
384 yield self
Martin v. Löwisef04c442008-03-19 05:04:44 +0000385
386 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000387 """Return a post-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000388 yield self
389
390 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000391 """Return a pre-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000392 yield self
393
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000394 def _prefix_getter(self):
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000395 """
396 The whitespace and comments preceding this token in the input.
397 """
398 return self._prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000399
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000400 def _prefix_setter(self, prefix):
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000401 self.changed()
402 self._prefix = prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000403
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000404 prefix = property(_prefix_getter, _prefix_setter)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000405
406def convert(gr, raw_node):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000407 """
408 Convert raw node information to a Node or Leaf instance.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000409
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000410 This is passed to the parser driver which calls it whenever a reduction of a
411 grammar rule produces a new complete node, so that the tree is build
412 strictly bottom-up.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000413 """
414 type, value, context, children = raw_node
415 if children or type in gr.number2symbol:
416 # If there's exactly one child, return that child instead of
417 # creating a new node.
418 if len(children) == 1:
419 return children[0]
420 return Node(type, children, context=context)
421 else:
422 return Leaf(type, value, context=context)
423
424
425class BasePattern(object):
426
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000427 """
428 A pattern is a tree matching pattern.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000429
430 It looks for a specific node type (token or symbol), and
431 optionally for a specific content.
432
433 This is an abstract base class. There are three concrete
434 subclasses:
435
436 - LeafPattern matches a single leaf node;
437 - NodePattern matches a single node (usually non-leaf);
438 - WildcardPattern matches a sequence of nodes of variable length.
439 """
440
441 # Defaults for instance variables
442 type = None # Node type (token if < 256, symbol if >= 256)
443 content = None # Optional content matching pattern
444 name = None # Optional name used to store match in results dict
445
446 def __new__(cls, *args, **kwds):
447 """Constructor that prevents BasePattern from being instantiated."""
448 assert cls is not BasePattern, "Cannot instantiate BasePattern"
449 return object.__new__(cls)
450
451 def __repr__(self):
Martin v. Löwis3faa84f2008-03-22 00:07:09 +0000452 args = [type_repr(self.type), self.content, self.name]
Martin v. Löwisef04c442008-03-19 05:04:44 +0000453 while args and args[-1] is None:
454 del args[-1]
455 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
456
457 def optimize(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000458 """
459 A subclass can define this as a hook for optimizations.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000460
461 Returns either self or another node with the same effect.
462 """
463 return self
464
465 def match(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000466 """
467 Does this pattern exactly match a node?
Martin v. Löwisef04c442008-03-19 05:04:44 +0000468
469 Returns True if it matches, False if not.
470
471 If results is not None, it must be a dict which will be
472 updated with the nodes matching named subpatterns.
473
474 Default implementation for non-wildcard patterns.
475 """
476 if self.type is not None and node.type != self.type:
477 return False
478 if self.content is not None:
479 r = None
480 if results is not None:
481 r = {}
482 if not self._submatch(node, r):
483 return False
484 if r:
485 results.update(r)
486 if results is not None and self.name:
487 results[self.name] = node
488 return True
489
490 def match_seq(self, nodes, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000491 """
492 Does this pattern exactly match a sequence of nodes?
Martin v. Löwisef04c442008-03-19 05:04:44 +0000493
494 Default implementation for non-wildcard patterns.
495 """
496 if len(nodes) != 1:
497 return False
498 return self.match(nodes[0], results)
499
500 def generate_matches(self, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000501 """
502 Generator yielding all matches for this pattern.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000503
504 Default implementation for non-wildcard patterns.
505 """
506 r = {}
507 if nodes and self.match(nodes[0], r):
508 yield 1, r
509
510
511class LeafPattern(BasePattern):
512
513 def __init__(self, type=None, content=None, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000514 """
515 Initializer. Takes optional type, content, and name.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000516
517 The type, if given must be a token type (< 256). If not given,
518 this matches any *leaf* node; the content may still be required.
519
520 The content, if given, must be a string.
521
522 If a name is given, the matching node is stored in the results
523 dict under that key.
524 """
525 if type is not None:
526 assert 0 <= type < 256, type
527 if content is not None:
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000528 assert isinstance(content, str), repr(content)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000529 self.type = type
530 self.content = content
531 self.name = name
532
533 def match(self, node, results=None):
534 """Override match() to insist on a leaf node."""
535 if not isinstance(node, Leaf):
536 return False
537 return BasePattern.match(self, node, results)
538
539 def _submatch(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000540 """
541 Match the pattern's content to the node's children.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000542
543 This assumes the node type matches and self.content is not None.
544
545 Returns True if it matches, False if not.
546
547 If results is not None, it must be a dict which will be
548 updated with the nodes matching named subpatterns.
549
550 When returning False, the results dict may still be updated.
551 """
552 return self.content == node.value
553
554
555class NodePattern(BasePattern):
556
557 wildcards = False
558
559 def __init__(self, type=None, content=None, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000560 """
561 Initializer. Takes optional type, content, and name.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000562
563 The type, if given, must be a symbol type (>= 256). If the
564 type is None this matches *any* single node (leaf or not),
565 except if content is not None, in which it only matches
566 non-leaf nodes that also match the content pattern.
567
568 The content, if not None, must be a sequence of Patterns that
569 must match the node's children exactly. If the content is
570 given, the type must not be None.
571
572 If a name is given, the matching node is stored in the results
573 dict under that key.
574 """
575 if type is not None:
576 assert type >= 256, type
577 if content is not None:
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000578 assert not isinstance(content, str), repr(content)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000579 content = list(content)
580 for i, item in enumerate(content):
581 assert isinstance(item, BasePattern), (i, item)
582 if isinstance(item, WildcardPattern):
583 self.wildcards = True
584 self.type = type
585 self.content = content
586 self.name = name
587
588 def _submatch(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000589 """
590 Match the pattern's content to the node's children.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000591
592 This assumes the node type matches and self.content is not None.
593
594 Returns True if it matches, False if not.
595
596 If results is not None, it must be a dict which will be
597 updated with the nodes matching named subpatterns.
598
599 When returning False, the results dict may still be updated.
600 """
601 if self.wildcards:
602 for c, r in generate_matches(self.content, node.children):
603 if c == len(node.children):
604 if results is not None:
605 results.update(r)
606 return True
607 return False
608 if len(self.content) != len(node.children):
609 return False
610 for subpattern, child in zip(self.content, node.children):
611 if not subpattern.match(child, results):
612 return False
613 return True
614
615
616class WildcardPattern(BasePattern):
617
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000618 """
619 A wildcard pattern can match zero or more nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000620
621 This has all the flexibility needed to implement patterns like:
622
623 .* .+ .? .{m,n}
624 (a b c | d e | f)
625 (...)* (...)+ (...)? (...){m,n}
626
627 except it always uses non-greedy matching.
628 """
629
630 def __init__(self, content=None, min=0, max=HUGE, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000631 """
632 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000633
634 Args:
635 content: optional sequence of subsequences of patterns;
636 if absent, matches one node;
637 if present, each subsequence is an alternative [*]
Ezio Melotti13925002011-03-16 11:05:33 +0200638 min: optional minimum number of times to match, default 0
639 max: optional maximum number of times to match, default HUGE
Martin v. Löwisef04c442008-03-19 05:04:44 +0000640 name: optional name assigned to this match
641
642 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
643 equivalent to (a b c | d e | f g h); if content is None,
644 this is equivalent to '.' in regular expression terms.
645 The min and max parameters work as follows:
646 min=0, max=maxint: .*
647 min=1, max=maxint: .+
648 min=0, max=1: .?
649 min=1, max=1: .
650 If content is not None, replace the dot with the parenthesized
651 list of alternatives, e.g. (a b c | d e | f g h)*
652 """
653 assert 0 <= min <= max <= HUGE, (min, max)
654 if content is not None:
655 content = tuple(map(tuple, content)) # Protect against alterations
656 # Check sanity of alternatives
657 assert len(content), repr(content) # Can't have zero alternatives
658 for alt in content:
659 assert len(alt), repr(alt) # Can have empty alternatives
660 self.content = content
661 self.min = min
662 self.max = max
663 self.name = name
664
665 def optimize(self):
666 """Optimize certain stacked wildcard patterns."""
667 subpattern = None
668 if (self.content is not None and
669 len(self.content) == 1 and len(self.content[0]) == 1):
670 subpattern = self.content[0][0]
671 if self.min == 1 and self.max == 1:
672 if self.content is None:
673 return NodePattern(name=self.name)
674 if subpattern is not None and self.name == subpattern.name:
675 return subpattern.optimize()
676 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
677 subpattern.min <= 1 and self.name == subpattern.name):
678 return WildcardPattern(subpattern.content,
679 self.min*subpattern.min,
680 self.max*subpattern.max,
681 subpattern.name)
682 return self
683
684 def match(self, node, results=None):
685 """Does this pattern exactly match a node?"""
686 return self.match_seq([node], results)
687
688 def match_seq(self, nodes, results=None):
689 """Does this pattern exactly match a sequence of nodes?"""
690 for c, r in self.generate_matches(nodes):
691 if c == len(nodes):
692 if results is not None:
693 results.update(r)
694 if self.name:
695 results[self.name] = list(nodes)
696 return True
697 return False
698
699 def generate_matches(self, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000700 """
701 Generator yielding matches for a sequence of nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000702
703 Args:
704 nodes: sequence of nodes
705
706 Yields:
707 (count, results) tuples where:
708 count: the match comprises nodes[:count];
709 results: dict containing named submatches.
710 """
711 if self.content is None:
712 # Shortcut for special case (see __init__.__doc__)
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000713 for count in range(self.min, 1 + min(len(nodes), self.max)):
Martin v. Löwisef04c442008-03-19 05:04:44 +0000714 r = {}
715 if self.name:
716 r[self.name] = nodes[:count]
717 yield count, r
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000718 elif self.name == "bare_name":
719 yield self._bare_name_matches(nodes)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000720 else:
Benjamin Peterson5cff9312008-11-28 23:01:28 +0000721 # The reason for this is that hitting the recursion limit usually
722 # results in some ugly messages about how RuntimeErrors are being
Benjamin Petersond86e4c82011-03-06 17:14:30 -0600723 # ignored. We only have to do this on CPython, though, because other
724 # implementations don't have this nasty bug in the first place.
725 if hasattr(sys, "getrefcount"):
726 save_stderr = sys.stderr
727 sys.stderr = StringIO()
Benjamin Peterson7d8d9a52008-10-04 21:04:36 +0000728 try:
729 for count, r in self._recursive_matches(nodes, 0):
730 if self.name:
731 r[self.name] = nodes[:count]
732 yield count, r
733 except RuntimeError:
734 # We fall back to the iterative pattern matching scheme if the recursive
735 # scheme hits the recursion limit.
736 for count, r in self._iterative_matches(nodes):
737 if self.name:
738 r[self.name] = nodes[:count]
739 yield count, r
Benjamin Peterson5cff9312008-11-28 23:01:28 +0000740 finally:
Benjamin Petersond86e4c82011-03-06 17:14:30 -0600741 if hasattr(sys, "getrefcount"):
742 sys.stderr = save_stderr
Benjamin Peterson7d8d9a52008-10-04 21:04:36 +0000743
744 def _iterative_matches(self, nodes):
745 """Helper to iteratively yield the matches."""
746 nodelen = len(nodes)
747 if 0 >= self.min:
748 yield 0, {}
749
750 results = []
751 # generate matches that use just one alt from self.content
752 for alt in self.content:
753 for c, r in generate_matches(alt, nodes):
754 yield c, r
755 results.append((c, r))
756
757 # for each match, iterate down the nodes
758 while results:
759 new_results = []
760 for c0, r0 in results:
761 # stop if the entire set of nodes has been matched
762 if c0 < nodelen and c0 <= self.max:
763 for alt in self.content:
764 for c1, r1 in generate_matches(alt, nodes[c0:]):
765 if c1 > 0:
766 r = {}
767 r.update(r0)
768 r.update(r1)
769 yield c0 + c1, r
770 new_results.append((c0 + c1, r))
771 results = new_results
Martin v. Löwisef04c442008-03-19 05:04:44 +0000772
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000773 def _bare_name_matches(self, nodes):
774 """Special optimized matcher for bare_name."""
775 count = 0
776 r = {}
777 done = False
778 max = len(nodes)
779 while not done and count < max:
780 done = True
781 for leaf in self.content:
782 if leaf[0].match(nodes[count], r):
783 count += 1
784 done = False
785 break
786 r[self.name] = nodes[:count]
787 return count, r
788
Martin v. Löwisef04c442008-03-19 05:04:44 +0000789 def _recursive_matches(self, nodes, count):
790 """Helper to recursively yield the matches."""
791 assert self.content is not None
792 if count >= self.min:
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000793 yield 0, {}
Martin v. Löwisef04c442008-03-19 05:04:44 +0000794 if count < self.max:
795 for alt in self.content:
796 for c0, r0 in generate_matches(alt, nodes):
797 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
798 r = {}
799 r.update(r0)
800 r.update(r1)
801 yield c0 + c1, r
802
803
804class NegatedPattern(BasePattern):
805
806 def __init__(self, content=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000807 """
808 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000809
810 The argument is either a pattern or None. If it is None, this
811 only matches an empty sequence (effectively '$' in regex
812 lingo). If it is not None, this matches whenever the argument
813 pattern doesn't have any matches.
814 """
815 if content is not None:
816 assert isinstance(content, BasePattern), repr(content)
817 self.content = content
818
819 def match(self, node):
820 # We never match a node in its entirety
821 return False
822
823 def match_seq(self, nodes):
824 # We only match an empty sequence of nodes in its entirety
825 return len(nodes) == 0
826
827 def generate_matches(self, nodes):
828 if self.content is None:
829 # Return a match if there is an empty sequence
830 if len(nodes) == 0:
831 yield 0, {}
832 else:
833 # Return a match if the argument pattern has no matches
834 for c, r in self.content.generate_matches(nodes):
835 return
836 yield 0, {}
837
838
839def generate_matches(patterns, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000840 """
841 Generator yielding matches for a sequence of patterns and nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000842
843 Args:
844 patterns: a sequence of patterns
845 nodes: a sequence of nodes
846
847 Yields:
848 (count, results) tuples where:
849 count: the entire sequence of patterns matches nodes[:count];
850 results: dict containing named submatches.
851 """
852 if not patterns:
853 yield 0, {}
854 else:
855 p, rest = patterns[0], patterns[1:]
856 for c0, r0 in p.generate_matches(nodes):
857 if not rest:
858 yield c0, r0
859 else:
860 for c1, r1 in generate_matches(rest, nodes[c0:]):
861 r = {}
862 r.update(r0)
863 r.update(r1)
864 yield c0 + c1, r