blob: 17cbf0a2f916b6cc9a233a09fd1d12342f33a13d [file] [log] [blame]
Martin v. Löwisef04c442008-03-19 05:04:44 +00001# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
Benjamin Peterson608d8bc2009-05-05 23:23:31 +00004"""
5Python parse tree definitions.
Martin v. Löwisef04c442008-03-19 05:04:44 +00006
7This is a very concrete parse tree; we need to keep every token and
8even the comments and whitespace between tokens.
9
10There's also a pattern matching implementation here.
11"""
12
13__author__ = "Guido van Rossum <guido@python.org>"
14
Benjamin Peterson5cff9312008-11-28 23:01:28 +000015import sys
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +000016import warnings
Benjamin Peterson5cff9312008-11-28 23:01:28 +000017from io import StringIO
18
Martin v. Löwisef04c442008-03-19 05:04:44 +000019HUGE = 0x7FFFFFFF # maximum repeat count, default max
20
Martin v. Löwis3faa84f2008-03-22 00:07:09 +000021_type_reprs = {}
22def type_repr(type_num):
23 global _type_reprs
24 if not _type_reprs:
25 from .pygram import python_symbols
26 # printing tokens is possible but not as useful
27 # from .pgen2 import token // token.__dict__.items():
28 for name, val in python_symbols.__dict__.items():
29 if type(val) == int: _type_reprs[val] = name
30 return _type_reprs.setdefault(type_num, type_num)
31
Martin v. Löwisef04c442008-03-19 05:04:44 +000032class Base(object):
33
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000034 """
35 Abstract base class for Node and Leaf.
Martin v. Löwisef04c442008-03-19 05:04:44 +000036
37 This provides some default functionality and boilerplate using the
38 template pattern.
39
40 A node may be a subnode of at most one parent.
41 """
42
43 # Default values for instance variables
44 type = None # int: token number (< 256) or symbol number (>= 256)
45 parent = None # Parent node pointer, or None
46 children = () # Tuple of subnodes
47 was_changed = False
Benjamin Petersonb0871ca2010-10-14 23:03:32 +000048 was_checked = False
Martin v. Löwisef04c442008-03-19 05:04:44 +000049
50 def __new__(cls, *args, **kwds):
51 """Constructor that prevents Base from being instantiated."""
52 assert cls is not Base, "Cannot instantiate Base"
53 return object.__new__(cls)
54
55 def __eq__(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000056 """
57 Compare two nodes for equality.
Martin v. Löwisef04c442008-03-19 05:04:44 +000058
59 This calls the method _eq().
60 """
61 if self.__class__ is not other.__class__:
62 return NotImplemented
63 return self._eq(other)
64
Benjamin Petersone80b51f2009-11-02 18:33:36 +000065 __hash__ = None # For Py3 compatibility.
66
Martin v. Löwisef04c442008-03-19 05:04:44 +000067 def __ne__(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000068 """
69 Compare two nodes for inequality.
Martin v. Löwisef04c442008-03-19 05:04:44 +000070
71 This calls the method _eq().
72 """
73 if self.__class__ is not other.__class__:
74 return NotImplemented
75 return not self._eq(other)
76
77 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000078 """
79 Compare two nodes for equality.
Martin v. Löwisef04c442008-03-19 05:04:44 +000080
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000081 This is called by __eq__ and __ne__. It is only called if the two nodes
82 have the same type. This must be implemented by the concrete subclass.
83 Nodes should be considered equal if they have the same structure,
84 ignoring the prefix string and other context information.
Martin v. Löwisef04c442008-03-19 05:04:44 +000085 """
86 raise NotImplementedError
87
88 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000089 """
90 Return a cloned (deep) copy of self.
Martin v. Löwisef04c442008-03-19 05:04:44 +000091
92 This must be implemented by the concrete subclass.
93 """
94 raise NotImplementedError
95
96 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000097 """
98 Return a post-order iterator for the tree.
Martin v. Löwisef04c442008-03-19 05:04:44 +000099
100 This must be implemented by the concrete subclass.
101 """
102 raise NotImplementedError
103
104 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000105 """
106 Return a pre-order iterator for the tree.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000107
108 This must be implemented by the concrete subclass.
109 """
110 raise NotImplementedError
111
Martin v. Löwisef04c442008-03-19 05:04:44 +0000112 def replace(self, new):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000113 """Replace this node with a new one in the parent."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000114 assert self.parent is not None, str(self)
115 assert new is not None
116 if not isinstance(new, list):
117 new = [new]
118 l_children = []
119 found = False
120 for ch in self.parent.children:
121 if ch is self:
122 assert not found, (self.parent.children, self, new)
123 if new is not None:
124 l_children.extend(new)
125 found = True
126 else:
127 l_children.append(ch)
128 assert found, (self.children, self, new)
129 self.parent.changed()
130 self.parent.children = l_children
131 for x in new:
132 x.parent = self.parent
133 self.parent = None
134
135 def get_lineno(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000136 """Return the line number which generated the invocant node."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000137 node = self
138 while not isinstance(node, Leaf):
139 if not node.children:
140 return
141 node = node.children[0]
142 return node.lineno
143
144 def changed(self):
145 if self.parent:
146 self.parent.changed()
147 self.was_changed = True
148
149 def remove(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000150 """
151 Remove the node from the tree. Returns the position of the node in its
152 parent's children before it was removed.
153 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000154 if self.parent:
155 for i, node in enumerate(self.parent.children):
156 if node is self:
157 self.parent.changed()
158 del self.parent.children[i]
159 self.parent = None
160 return i
161
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000162 @property
163 def next_sibling(self):
164 """
165 The node immediately following the invocant in their parent's children
166 list. If the invocant does not have a next sibling, it is None
167 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000168 if self.parent is None:
169 return None
170
171 # Can't use index(); we need to test by identity
Martin v. Löwis3de92bf2008-04-10 02:50:50 +0000172 for i, child in enumerate(self.parent.children):
173 if child is self:
Martin v. Löwisef04c442008-03-19 05:04:44 +0000174 try:
175 return self.parent.children[i+1]
176 except IndexError:
177 return None
178
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000179 @property
180 def prev_sibling(self):
181 """
182 The node immediately preceding the invocant in their parent's children
183 list. If the invocant does not have a previous sibling, it is None.
184 """
Martin v. Löwis3de92bf2008-04-10 02:50:50 +0000185 if self.parent is None:
186 return None
187
188 # Can't use index(); we need to test by identity
189 for i, child in enumerate(self.parent.children):
190 if child is self:
191 if i == 0:
192 return None
193 return self.parent.children[i-1]
194
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000195 def leaves(self):
196 for child in self.children:
197 for x in child.leaves():
198 yield x
199
200 def depth(self):
201 if self.parent is None:
202 return 0
203 return 1 + self.parent.depth()
204
Martin v. Löwisef04c442008-03-19 05:04:44 +0000205 def get_suffix(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000206 """
207 Return the string immediately following the invocant node. This is
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000208 effectively equivalent to node.next_sibling.prefix
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000209 """
210 next_sib = self.next_sibling
Martin v. Löwisef04c442008-03-19 05:04:44 +0000211 if next_sib is None:
212 return ""
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000213 return next_sib.prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000214
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000215 if sys.version_info < (3, 0):
216 def __str__(self):
217 return str(self).encode("ascii")
218
Martin v. Löwisef04c442008-03-19 05:04:44 +0000219class Node(Base):
220
221 """Concrete implementation for interior nodes."""
222
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000223 def __init__(self,type, children,
224 context=None,
225 prefix=None,
226 fixers_applied=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000227 """
228 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000229
230 Takes a type constant (a symbol number >= 256), a sequence of
231 child nodes, and an optional context keyword argument.
232
233 As a side effect, the parent pointers of the children are updated.
234 """
235 assert type >= 256, type
236 self.type = type
237 self.children = list(children)
238 for ch in self.children:
239 assert ch.parent is None, repr(ch)
240 ch.parent = self
241 if prefix is not None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000242 self.prefix = prefix
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000243 if fixers_applied:
244 self.fixers_applied = fixers_applied[:]
245 else:
246 self.fixers_applied = None
Martin v. Löwisef04c442008-03-19 05:04:44 +0000247
248 def __repr__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000249 """Return a canonical string representation."""
Martin v. Löwis3faa84f2008-03-22 00:07:09 +0000250 return "%s(%s, %r)" % (self.__class__.__name__,
251 type_repr(self.type),
Martin v. Löwisef04c442008-03-19 05:04:44 +0000252 self.children)
253
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000254 def __unicode__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000255 """
256 Return a pretty string representation.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000257
258 This reproduces the input source exactly.
259 """
260 return "".join(map(str, self.children))
261
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000262 if sys.version_info > (3, 0):
263 __str__ = __unicode__
264
Martin v. Löwisef04c442008-03-19 05:04:44 +0000265 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000266 """Compare two nodes for equality."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000267 return (self.type, self.children) == (other.type, other.children)
268
269 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000270 """Return a cloned (deep) copy of self."""
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000271 return Node(self.type, [ch.clone() for ch in self.children],
272 fixers_applied=self.fixers_applied)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000273
274 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000275 """Return a post-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000276 for child in self.children:
277 for node in child.post_order():
278 yield node
279 yield self
280
281 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000282 """Return a pre-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000283 yield self
284 for child in self.children:
Benjamin Peterson68c80ed2010-08-08 19:23:25 +0000285 for node in child.pre_order():
Martin v. Löwisef04c442008-03-19 05:04:44 +0000286 yield node
287
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000288 def _prefix_getter(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000289 """
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000290 The whitespace and comments preceding this node in the input.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000291 """
292 if not self.children:
293 return ""
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000294 return self.children[0].prefix
295
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000296 def _prefix_setter(self, prefix):
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000297 if self.children:
298 self.children[0].prefix = prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000299
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000300 prefix = property(_prefix_getter, _prefix_setter)
301
Martin v. Löwisef04c442008-03-19 05:04:44 +0000302 def set_child(self, i, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000303 """
304 Equivalent to 'node.children[i] = child'. This method also sets the
305 child's parent attribute appropriately.
306 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000307 child.parent = self
308 self.children[i].parent = None
309 self.children[i] = child
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000310 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000311
312 def insert_child(self, i, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000313 """
314 Equivalent to 'node.children.insert(i, child)'. This method also sets
315 the child's parent attribute appropriately.
316 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000317 child.parent = self
318 self.children.insert(i, child)
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000319 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000320
321 def append_child(self, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000322 """
323 Equivalent to 'node.children.append(child)'. This method also sets the
324 child's parent attribute appropriately.
325 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000326 child.parent = self
327 self.children.append(child)
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000328 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000329
330
331class Leaf(Base):
332
333 """Concrete implementation for leaf nodes."""
334
335 # Default values for instance variables
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000336 _prefix = "" # Whitespace and comments preceding this token in the input
337 lineno = 0 # Line where this token starts in the input
338 column = 0 # Column where this token tarts in the input
Martin v. Löwisef04c442008-03-19 05:04:44 +0000339
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000340 def __init__(self, type, value,
341 context=None,
342 prefix=None,
343 fixers_applied=[]):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000344 """
345 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000346
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000347 Takes a type constant (a token number < 256), a string value, and an
348 optional context keyword argument.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000349 """
350 assert 0 <= type < 256, type
351 if context is not None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000352 self._prefix, (self.lineno, self.column) = context
Martin v. Löwisef04c442008-03-19 05:04:44 +0000353 self.type = type
354 self.value = value
355 if prefix is not None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000356 self._prefix = prefix
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000357 self.fixers_applied = fixers_applied[:]
Martin v. Löwisef04c442008-03-19 05:04:44 +0000358
359 def __repr__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000360 """Return a canonical string representation."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000361 return "%s(%r, %r)" % (self.__class__.__name__,
362 self.type,
363 self.value)
364
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000365 def __unicode__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000366 """
367 Return a pretty string representation.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000368
369 This reproduces the input source exactly.
370 """
371 return self.prefix + str(self.value)
372
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000373 if sys.version_info > (3, 0):
374 __str__ = __unicode__
375
Martin v. Löwisef04c442008-03-19 05:04:44 +0000376 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000377 """Compare two nodes for equality."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000378 return (self.type, self.value) == (other.type, other.value)
379
380 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000381 """Return a cloned (deep) copy of self."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000382 return Leaf(self.type, self.value,
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000383 (self.prefix, (self.lineno, self.column)),
384 fixers_applied=self.fixers_applied)
385
386 def leaves(self):
387 yield self
Martin v. Löwisef04c442008-03-19 05:04:44 +0000388
389 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000390 """Return a post-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000391 yield self
392
393 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000394 """Return a pre-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000395 yield self
396
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000397 def _prefix_getter(self):
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000398 """
399 The whitespace and comments preceding this token in the input.
400 """
401 return self._prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000402
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000403 def _prefix_setter(self, prefix):
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000404 self.changed()
405 self._prefix = prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000406
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000407 prefix = property(_prefix_getter, _prefix_setter)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000408
409def convert(gr, raw_node):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000410 """
411 Convert raw node information to a Node or Leaf instance.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000412
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000413 This is passed to the parser driver which calls it whenever a reduction of a
414 grammar rule produces a new complete node, so that the tree is build
415 strictly bottom-up.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000416 """
417 type, value, context, children = raw_node
418 if children or type in gr.number2symbol:
419 # If there's exactly one child, return that child instead of
420 # creating a new node.
421 if len(children) == 1:
422 return children[0]
423 return Node(type, children, context=context)
424 else:
425 return Leaf(type, value, context=context)
426
427
428class BasePattern(object):
429
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000430 """
431 A pattern is a tree matching pattern.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000432
433 It looks for a specific node type (token or symbol), and
434 optionally for a specific content.
435
436 This is an abstract base class. There are three concrete
437 subclasses:
438
439 - LeafPattern matches a single leaf node;
440 - NodePattern matches a single node (usually non-leaf);
441 - WildcardPattern matches a sequence of nodes of variable length.
442 """
443
444 # Defaults for instance variables
445 type = None # Node type (token if < 256, symbol if >= 256)
446 content = None # Optional content matching pattern
447 name = None # Optional name used to store match in results dict
448
449 def __new__(cls, *args, **kwds):
450 """Constructor that prevents BasePattern from being instantiated."""
451 assert cls is not BasePattern, "Cannot instantiate BasePattern"
452 return object.__new__(cls)
453
454 def __repr__(self):
Martin v. Löwis3faa84f2008-03-22 00:07:09 +0000455 args = [type_repr(self.type), self.content, self.name]
Martin v. Löwisef04c442008-03-19 05:04:44 +0000456 while args and args[-1] is None:
457 del args[-1]
458 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
459
460 def optimize(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000461 """
462 A subclass can define this as a hook for optimizations.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000463
464 Returns either self or another node with the same effect.
465 """
466 return self
467
468 def match(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000469 """
470 Does this pattern exactly match a node?
Martin v. Löwisef04c442008-03-19 05:04:44 +0000471
472 Returns True if it matches, False if not.
473
474 If results is not None, it must be a dict which will be
475 updated with the nodes matching named subpatterns.
476
477 Default implementation for non-wildcard patterns.
478 """
479 if self.type is not None and node.type != self.type:
480 return False
481 if self.content is not None:
482 r = None
483 if results is not None:
484 r = {}
485 if not self._submatch(node, r):
486 return False
487 if r:
488 results.update(r)
489 if results is not None and self.name:
490 results[self.name] = node
491 return True
492
493 def match_seq(self, nodes, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000494 """
495 Does this pattern exactly match a sequence of nodes?
Martin v. Löwisef04c442008-03-19 05:04:44 +0000496
497 Default implementation for non-wildcard patterns.
498 """
499 if len(nodes) != 1:
500 return False
501 return self.match(nodes[0], results)
502
503 def generate_matches(self, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000504 """
505 Generator yielding all matches for this pattern.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000506
507 Default implementation for non-wildcard patterns.
508 """
509 r = {}
510 if nodes and self.match(nodes[0], r):
511 yield 1, r
512
513
514class LeafPattern(BasePattern):
515
516 def __init__(self, type=None, content=None, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000517 """
518 Initializer. Takes optional type, content, and name.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000519
520 The type, if given must be a token type (< 256). If not given,
521 this matches any *leaf* node; the content may still be required.
522
523 The content, if given, must be a string.
524
525 If a name is given, the matching node is stored in the results
526 dict under that key.
527 """
528 if type is not None:
529 assert 0 <= type < 256, type
530 if content is not None:
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000531 assert isinstance(content, str), repr(content)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000532 self.type = type
533 self.content = content
534 self.name = name
535
536 def match(self, node, results=None):
537 """Override match() to insist on a leaf node."""
538 if not isinstance(node, Leaf):
539 return False
540 return BasePattern.match(self, node, results)
541
542 def _submatch(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000543 """
544 Match the pattern's content to the node's children.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000545
546 This assumes the node type matches and self.content is not None.
547
548 Returns True if it matches, False if not.
549
550 If results is not None, it must be a dict which will be
551 updated with the nodes matching named subpatterns.
552
553 When returning False, the results dict may still be updated.
554 """
555 return self.content == node.value
556
557
558class NodePattern(BasePattern):
559
560 wildcards = False
561
562 def __init__(self, type=None, content=None, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000563 """
564 Initializer. Takes optional type, content, and name.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000565
566 The type, if given, must be a symbol type (>= 256). If the
567 type is None this matches *any* single node (leaf or not),
568 except if content is not None, in which it only matches
569 non-leaf nodes that also match the content pattern.
570
571 The content, if not None, must be a sequence of Patterns that
572 must match the node's children exactly. If the content is
573 given, the type must not be None.
574
575 If a name is given, the matching node is stored in the results
576 dict under that key.
577 """
578 if type is not None:
579 assert type >= 256, type
580 if content is not None:
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000581 assert not isinstance(content, str), repr(content)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000582 content = list(content)
583 for i, item in enumerate(content):
584 assert isinstance(item, BasePattern), (i, item)
585 if isinstance(item, WildcardPattern):
586 self.wildcards = True
587 self.type = type
588 self.content = content
589 self.name = name
590
591 def _submatch(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000592 """
593 Match the pattern's content to the node's children.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000594
595 This assumes the node type matches and self.content is not None.
596
597 Returns True if it matches, False if not.
598
599 If results is not None, it must be a dict which will be
600 updated with the nodes matching named subpatterns.
601
602 When returning False, the results dict may still be updated.
603 """
604 if self.wildcards:
605 for c, r in generate_matches(self.content, node.children):
606 if c == len(node.children):
607 if results is not None:
608 results.update(r)
609 return True
610 return False
611 if len(self.content) != len(node.children):
612 return False
613 for subpattern, child in zip(self.content, node.children):
614 if not subpattern.match(child, results):
615 return False
616 return True
617
618
619class WildcardPattern(BasePattern):
620
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000621 """
622 A wildcard pattern can match zero or more nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000623
624 This has all the flexibility needed to implement patterns like:
625
626 .* .+ .? .{m,n}
627 (a b c | d e | f)
628 (...)* (...)+ (...)? (...){m,n}
629
630 except it always uses non-greedy matching.
631 """
632
633 def __init__(self, content=None, min=0, max=HUGE, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000634 """
635 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000636
637 Args:
638 content: optional sequence of subsequences of patterns;
639 if absent, matches one node;
640 if present, each subsequence is an alternative [*]
Ezio Melotti13925002011-03-16 11:05:33 +0200641 min: optional minimum number of times to match, default 0
642 max: optional maximum number of times to match, default HUGE
Martin v. Löwisef04c442008-03-19 05:04:44 +0000643 name: optional name assigned to this match
644
645 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
646 equivalent to (a b c | d e | f g h); if content is None,
647 this is equivalent to '.' in regular expression terms.
648 The min and max parameters work as follows:
649 min=0, max=maxint: .*
650 min=1, max=maxint: .+
651 min=0, max=1: .?
652 min=1, max=1: .
653 If content is not None, replace the dot with the parenthesized
654 list of alternatives, e.g. (a b c | d e | f g h)*
655 """
656 assert 0 <= min <= max <= HUGE, (min, max)
657 if content is not None:
658 content = tuple(map(tuple, content)) # Protect against alterations
659 # Check sanity of alternatives
660 assert len(content), repr(content) # Can't have zero alternatives
661 for alt in content:
662 assert len(alt), repr(alt) # Can have empty alternatives
663 self.content = content
664 self.min = min
665 self.max = max
666 self.name = name
667
668 def optimize(self):
669 """Optimize certain stacked wildcard patterns."""
670 subpattern = None
671 if (self.content is not None and
672 len(self.content) == 1 and len(self.content[0]) == 1):
673 subpattern = self.content[0][0]
674 if self.min == 1 and self.max == 1:
675 if self.content is None:
676 return NodePattern(name=self.name)
677 if subpattern is not None and self.name == subpattern.name:
678 return subpattern.optimize()
679 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
680 subpattern.min <= 1 and self.name == subpattern.name):
681 return WildcardPattern(subpattern.content,
682 self.min*subpattern.min,
683 self.max*subpattern.max,
684 subpattern.name)
685 return self
686
687 def match(self, node, results=None):
688 """Does this pattern exactly match a node?"""
689 return self.match_seq([node], results)
690
691 def match_seq(self, nodes, results=None):
692 """Does this pattern exactly match a sequence of nodes?"""
693 for c, r in self.generate_matches(nodes):
694 if c == len(nodes):
695 if results is not None:
696 results.update(r)
697 if self.name:
698 results[self.name] = list(nodes)
699 return True
700 return False
701
702 def generate_matches(self, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000703 """
704 Generator yielding matches for a sequence of nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000705
706 Args:
707 nodes: sequence of nodes
708
709 Yields:
710 (count, results) tuples where:
711 count: the match comprises nodes[:count];
712 results: dict containing named submatches.
713 """
714 if self.content is None:
715 # Shortcut for special case (see __init__.__doc__)
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000716 for count in range(self.min, 1 + min(len(nodes), self.max)):
Martin v. Löwisef04c442008-03-19 05:04:44 +0000717 r = {}
718 if self.name:
719 r[self.name] = nodes[:count]
720 yield count, r
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000721 elif self.name == "bare_name":
722 yield self._bare_name_matches(nodes)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000723 else:
Benjamin Peterson5cff9312008-11-28 23:01:28 +0000724 # The reason for this is that hitting the recursion limit usually
725 # results in some ugly messages about how RuntimeErrors are being
Benjamin Petersond86e4c82011-03-06 17:14:30 -0600726 # ignored. We only have to do this on CPython, though, because other
727 # implementations don't have this nasty bug in the first place.
728 if hasattr(sys, "getrefcount"):
729 save_stderr = sys.stderr
730 sys.stderr = StringIO()
Benjamin Peterson7d8d9a52008-10-04 21:04:36 +0000731 try:
732 for count, r in self._recursive_matches(nodes, 0):
733 if self.name:
734 r[self.name] = nodes[:count]
735 yield count, r
736 except RuntimeError:
737 # We fall back to the iterative pattern matching scheme if the recursive
738 # scheme hits the recursion limit.
739 for count, r in self._iterative_matches(nodes):
740 if self.name:
741 r[self.name] = nodes[:count]
742 yield count, r
Benjamin Peterson5cff9312008-11-28 23:01:28 +0000743 finally:
Benjamin Petersond86e4c82011-03-06 17:14:30 -0600744 if hasattr(sys, "getrefcount"):
745 sys.stderr = save_stderr
Benjamin Peterson7d8d9a52008-10-04 21:04:36 +0000746
747 def _iterative_matches(self, nodes):
748 """Helper to iteratively yield the matches."""
749 nodelen = len(nodes)
750 if 0 >= self.min:
751 yield 0, {}
752
753 results = []
754 # generate matches that use just one alt from self.content
755 for alt in self.content:
756 for c, r in generate_matches(alt, nodes):
757 yield c, r
758 results.append((c, r))
759
760 # for each match, iterate down the nodes
761 while results:
762 new_results = []
763 for c0, r0 in results:
764 # stop if the entire set of nodes has been matched
765 if c0 < nodelen and c0 <= self.max:
766 for alt in self.content:
767 for c1, r1 in generate_matches(alt, nodes[c0:]):
768 if c1 > 0:
769 r = {}
770 r.update(r0)
771 r.update(r1)
772 yield c0 + c1, r
773 new_results.append((c0 + c1, r))
774 results = new_results
Martin v. Löwisef04c442008-03-19 05:04:44 +0000775
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000776 def _bare_name_matches(self, nodes):
777 """Special optimized matcher for bare_name."""
778 count = 0
779 r = {}
780 done = False
781 max = len(nodes)
782 while not done and count < max:
783 done = True
784 for leaf in self.content:
785 if leaf[0].match(nodes[count], r):
786 count += 1
787 done = False
788 break
789 r[self.name] = nodes[:count]
790 return count, r
791
Martin v. Löwisef04c442008-03-19 05:04:44 +0000792 def _recursive_matches(self, nodes, count):
793 """Helper to recursively yield the matches."""
794 assert self.content is not None
795 if count >= self.min:
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000796 yield 0, {}
Martin v. Löwisef04c442008-03-19 05:04:44 +0000797 if count < self.max:
798 for alt in self.content:
799 for c0, r0 in generate_matches(alt, nodes):
800 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
801 r = {}
802 r.update(r0)
803 r.update(r1)
804 yield c0 + c1, r
805
806
807class NegatedPattern(BasePattern):
808
809 def __init__(self, content=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000810 """
811 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000812
813 The argument is either a pattern or None. If it is None, this
814 only matches an empty sequence (effectively '$' in regex
815 lingo). If it is not None, this matches whenever the argument
816 pattern doesn't have any matches.
817 """
818 if content is not None:
819 assert isinstance(content, BasePattern), repr(content)
820 self.content = content
821
822 def match(self, node):
823 # We never match a node in its entirety
824 return False
825
826 def match_seq(self, nodes):
827 # We only match an empty sequence of nodes in its entirety
828 return len(nodes) == 0
829
830 def generate_matches(self, nodes):
831 if self.content is None:
832 # Return a match if there is an empty sequence
833 if len(nodes) == 0:
834 yield 0, {}
835 else:
836 # Return a match if the argument pattern has no matches
837 for c, r in self.content.generate_matches(nodes):
838 return
839 yield 0, {}
840
841
842def generate_matches(patterns, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000843 """
844 Generator yielding matches for a sequence of patterns and nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000845
846 Args:
847 patterns: a sequence of patterns
848 nodes: a sequence of nodes
849
850 Yields:
851 (count, results) tuples where:
852 count: the entire sequence of patterns matches nodes[:count];
853 results: dict containing named submatches.
854 """
855 if not patterns:
856 yield 0, {}
857 else:
858 p, rest = patterns[0], patterns[1:]
859 for c0, r0 in p.generate_matches(nodes):
860 if not rest:
861 yield c0, r0
862 else:
863 for c1, r1 in generate_matches(rest, nodes[c0:]):
864 r = {}
865 r.update(r0)
866 r.update(r1)
867 yield c0 + c1, r