blob: 2a6ef2ef5240a2b1aa9eecb5162368d5649b624c [file] [log] [blame]
Martin v. Löwisef04c442008-03-19 05:04:44 +00001# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
Benjamin Peterson608d8bc2009-05-05 23:23:31 +00004"""
5Python parse tree definitions.
Martin v. Löwisef04c442008-03-19 05:04:44 +00006
7This is a very concrete parse tree; we need to keep every token and
8even the comments and whitespace between tokens.
9
10There's also a pattern matching implementation here.
11"""
12
13__author__ = "Guido van Rossum <guido@python.org>"
14
Benjamin Peterson5cff9312008-11-28 23:01:28 +000015import sys
16from io import StringIO
17
Martin v. Löwisef04c442008-03-19 05:04:44 +000018HUGE = 0x7FFFFFFF # maximum repeat count, default max
19
Martin v. Löwis3faa84f2008-03-22 00:07:09 +000020_type_reprs = {}
21def type_repr(type_num):
22 global _type_reprs
23 if not _type_reprs:
24 from .pygram import python_symbols
25 # printing tokens is possible but not as useful
26 # from .pgen2 import token // token.__dict__.items():
27 for name, val in python_symbols.__dict__.items():
28 if type(val) == int: _type_reprs[val] = name
29 return _type_reprs.setdefault(type_num, type_num)
30
Martin v. Löwisef04c442008-03-19 05:04:44 +000031class Base(object):
32
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000033 """
34 Abstract base class for Node and Leaf.
Martin v. Löwisef04c442008-03-19 05:04:44 +000035
36 This provides some default functionality and boilerplate using the
37 template pattern.
38
39 A node may be a subnode of at most one parent.
40 """
41
42 # Default values for instance variables
43 type = None # int: token number (< 256) or symbol number (>= 256)
44 parent = None # Parent node pointer, or None
45 children = () # Tuple of subnodes
46 was_changed = False
Benjamin Petersonb0871ca2010-10-14 23:03:32 +000047 was_checked = False
Martin v. Löwisef04c442008-03-19 05:04:44 +000048
49 def __new__(cls, *args, **kwds):
50 """Constructor that prevents Base from being instantiated."""
51 assert cls is not Base, "Cannot instantiate Base"
52 return object.__new__(cls)
53
54 def __eq__(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000055 """
56 Compare two nodes for equality.
Martin v. Löwisef04c442008-03-19 05:04:44 +000057
58 This calls the method _eq().
59 """
60 if self.__class__ is not other.__class__:
61 return NotImplemented
62 return self._eq(other)
63
Benjamin Petersone80b51f2009-11-02 18:33:36 +000064 __hash__ = None # For Py3 compatibility.
65
Martin v. Löwisef04c442008-03-19 05:04:44 +000066 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000067 """
68 Compare two nodes for equality.
Martin v. Löwisef04c442008-03-19 05:04:44 +000069
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000070 This is called by __eq__ and __ne__. It is only called if the two nodes
71 have the same type. This must be implemented by the concrete subclass.
72 Nodes should be considered equal if they have the same structure,
73 ignoring the prefix string and other context information.
Martin v. Löwisef04c442008-03-19 05:04:44 +000074 """
75 raise NotImplementedError
76
77 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000078 """
79 Return a cloned (deep) copy of self.
Martin v. Löwisef04c442008-03-19 05:04:44 +000080
81 This must be implemented by the concrete subclass.
82 """
83 raise NotImplementedError
84
85 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000086 """
87 Return a post-order iterator for the tree.
Martin v. Löwisef04c442008-03-19 05:04:44 +000088
89 This must be implemented by the concrete subclass.
90 """
91 raise NotImplementedError
92
93 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000094 """
95 Return a pre-order iterator for the tree.
Martin v. Löwisef04c442008-03-19 05:04:44 +000096
97 This must be implemented by the concrete subclass.
98 """
99 raise NotImplementedError
100
Martin v. Löwisef04c442008-03-19 05:04:44 +0000101 def replace(self, new):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000102 """Replace this node with a new one in the parent."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000103 assert self.parent is not None, str(self)
104 assert new is not None
105 if not isinstance(new, list):
106 new = [new]
107 l_children = []
108 found = False
109 for ch in self.parent.children:
110 if ch is self:
111 assert not found, (self.parent.children, self, new)
112 if new is not None:
113 l_children.extend(new)
114 found = True
115 else:
116 l_children.append(ch)
117 assert found, (self.children, self, new)
118 self.parent.changed()
119 self.parent.children = l_children
120 for x in new:
121 x.parent = self.parent
122 self.parent = None
123
124 def get_lineno(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000125 """Return the line number which generated the invocant node."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000126 node = self
127 while not isinstance(node, Leaf):
128 if not node.children:
129 return
130 node = node.children[0]
131 return node.lineno
132
133 def changed(self):
134 if self.parent:
135 self.parent.changed()
136 self.was_changed = True
137
138 def remove(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000139 """
140 Remove the node from the tree. Returns the position of the node in its
141 parent's children before it was removed.
142 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000143 if self.parent:
144 for i, node in enumerate(self.parent.children):
145 if node is self:
146 self.parent.changed()
147 del self.parent.children[i]
148 self.parent = None
149 return i
150
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000151 @property
152 def next_sibling(self):
153 """
154 The node immediately following the invocant in their parent's children
155 list. If the invocant does not have a next sibling, it is None
156 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000157 if self.parent is None:
158 return None
159
160 # Can't use index(); we need to test by identity
Martin v. Löwis3de92bf2008-04-10 02:50:50 +0000161 for i, child in enumerate(self.parent.children):
162 if child is self:
Martin v. Löwisef04c442008-03-19 05:04:44 +0000163 try:
164 return self.parent.children[i+1]
165 except IndexError:
166 return None
167
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000168 @property
169 def prev_sibling(self):
170 """
171 The node immediately preceding the invocant in their parent's children
172 list. If the invocant does not have a previous sibling, it is None.
173 """
Martin v. Löwis3de92bf2008-04-10 02:50:50 +0000174 if self.parent is None:
175 return None
176
177 # Can't use index(); we need to test by identity
178 for i, child in enumerate(self.parent.children):
179 if child is self:
180 if i == 0:
181 return None
182 return self.parent.children[i-1]
183
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000184 def leaves(self):
185 for child in self.children:
Andrew Svetlov7d140152012-10-06 17:11:45 +0300186 yield from child.leaves()
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000187
188 def depth(self):
189 if self.parent is None:
190 return 0
191 return 1 + self.parent.depth()
192
Martin v. Löwisef04c442008-03-19 05:04:44 +0000193 def get_suffix(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000194 """
195 Return the string immediately following the invocant node. This is
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000196 effectively equivalent to node.next_sibling.prefix
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000197 """
198 next_sib = self.next_sibling
Martin v. Löwisef04c442008-03-19 05:04:44 +0000199 if next_sib is None:
200 return ""
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000201 return next_sib.prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000202
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000203 if sys.version_info < (3, 0):
204 def __str__(self):
205 return str(self).encode("ascii")
206
Martin v. Löwisef04c442008-03-19 05:04:44 +0000207class Node(Base):
208
209 """Concrete implementation for interior nodes."""
210
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000211 def __init__(self,type, children,
212 context=None,
213 prefix=None,
214 fixers_applied=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000215 """
216 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000217
218 Takes a type constant (a symbol number >= 256), a sequence of
219 child nodes, and an optional context keyword argument.
220
221 As a side effect, the parent pointers of the children are updated.
222 """
223 assert type >= 256, type
224 self.type = type
225 self.children = list(children)
226 for ch in self.children:
227 assert ch.parent is None, repr(ch)
228 ch.parent = self
229 if prefix is not None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000230 self.prefix = prefix
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000231 if fixers_applied:
232 self.fixers_applied = fixers_applied[:]
233 else:
234 self.fixers_applied = None
Martin v. Löwisef04c442008-03-19 05:04:44 +0000235
236 def __repr__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000237 """Return a canonical string representation."""
Martin v. Löwis3faa84f2008-03-22 00:07:09 +0000238 return "%s(%s, %r)" % (self.__class__.__name__,
239 type_repr(self.type),
Martin v. Löwisef04c442008-03-19 05:04:44 +0000240 self.children)
241
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000242 def __unicode__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000243 """
244 Return a pretty string representation.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000245
246 This reproduces the input source exactly.
247 """
248 return "".join(map(str, self.children))
249
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000250 if sys.version_info > (3, 0):
251 __str__ = __unicode__
252
Martin v. Löwisef04c442008-03-19 05:04:44 +0000253 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000254 """Compare two nodes for equality."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000255 return (self.type, self.children) == (other.type, other.children)
256
257 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000258 """Return a cloned (deep) copy of self."""
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000259 return Node(self.type, [ch.clone() for ch in self.children],
260 fixers_applied=self.fixers_applied)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000261
262 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000263 """Return a post-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000264 for child in self.children:
Andrew Svetlov7d140152012-10-06 17:11:45 +0300265 yield from child.post_order()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000266 yield self
267
268 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000269 """Return a pre-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000270 yield self
271 for child in self.children:
Andrew Svetlov7d140152012-10-06 17:11:45 +0300272 yield from child.pre_order()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000273
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200274 @property
275 def prefix(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000276 """
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000277 The whitespace and comments preceding this node in the input.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000278 """
279 if not self.children:
280 return ""
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000281 return self.children[0].prefix
282
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200283 @prefix.setter
284 def prefix(self, prefix):
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000285 if self.children:
286 self.children[0].prefix = prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000287
288 def set_child(self, i, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000289 """
290 Equivalent to 'node.children[i] = child'. This method also sets the
291 child's parent attribute appropriately.
292 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000293 child.parent = self
294 self.children[i].parent = None
295 self.children[i] = child
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000296 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000297
298 def insert_child(self, i, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000299 """
300 Equivalent to 'node.children.insert(i, child)'. This method also sets
301 the child's parent attribute appropriately.
302 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000303 child.parent = self
304 self.children.insert(i, child)
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000305 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000306
307 def append_child(self, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000308 """
309 Equivalent to 'node.children.append(child)'. This method also sets the
310 child's parent attribute appropriately.
311 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000312 child.parent = self
313 self.children.append(child)
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000314 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000315
316
317class Leaf(Base):
318
319 """Concrete implementation for leaf nodes."""
320
321 # Default values for instance variables
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000322 _prefix = "" # Whitespace and comments preceding this token in the input
323 lineno = 0 # Line where this token starts in the input
324 column = 0 # Column where this token tarts in the input
Martin v. Löwisef04c442008-03-19 05:04:44 +0000325
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000326 def __init__(self, type, value,
327 context=None,
328 prefix=None,
329 fixers_applied=[]):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000330 """
331 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000332
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000333 Takes a type constant (a token number < 256), a string value, and an
334 optional context keyword argument.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000335 """
336 assert 0 <= type < 256, type
337 if context is not None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000338 self._prefix, (self.lineno, self.column) = context
Martin v. Löwisef04c442008-03-19 05:04:44 +0000339 self.type = type
340 self.value = value
341 if prefix is not None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000342 self._prefix = prefix
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000343 self.fixers_applied = fixers_applied[:]
Martin v. Löwisef04c442008-03-19 05:04:44 +0000344
345 def __repr__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000346 """Return a canonical string representation."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000347 return "%s(%r, %r)" % (self.__class__.__name__,
348 self.type,
349 self.value)
350
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000351 def __unicode__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000352 """
353 Return a pretty string representation.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000354
355 This reproduces the input source exactly.
356 """
357 return self.prefix + str(self.value)
358
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000359 if sys.version_info > (3, 0):
360 __str__ = __unicode__
361
Martin v. Löwisef04c442008-03-19 05:04:44 +0000362 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000363 """Compare two nodes for equality."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000364 return (self.type, self.value) == (other.type, other.value)
365
366 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000367 """Return a cloned (deep) copy of self."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000368 return Leaf(self.type, self.value,
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000369 (self.prefix, (self.lineno, self.column)),
370 fixers_applied=self.fixers_applied)
371
372 def leaves(self):
373 yield self
Martin v. Löwisef04c442008-03-19 05:04:44 +0000374
375 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000376 """Return a post-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000377 yield self
378
379 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000380 """Return a pre-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000381 yield self
382
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200383 @property
384 def prefix(self):
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000385 """
386 The whitespace and comments preceding this token in the input.
387 """
388 return self._prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000389
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200390 @prefix.setter
391 def prefix(self, prefix):
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000392 self.changed()
393 self._prefix = prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000394
Martin v. Löwisef04c442008-03-19 05:04:44 +0000395def convert(gr, raw_node):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000396 """
397 Convert raw node information to a Node or Leaf instance.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000398
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000399 This is passed to the parser driver which calls it whenever a reduction of a
400 grammar rule produces a new complete node, so that the tree is build
401 strictly bottom-up.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000402 """
403 type, value, context, children = raw_node
404 if children or type in gr.number2symbol:
405 # If there's exactly one child, return that child instead of
406 # creating a new node.
407 if len(children) == 1:
408 return children[0]
409 return Node(type, children, context=context)
410 else:
411 return Leaf(type, value, context=context)
412
413
414class BasePattern(object):
415
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000416 """
417 A pattern is a tree matching pattern.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000418
419 It looks for a specific node type (token or symbol), and
420 optionally for a specific content.
421
422 This is an abstract base class. There are three concrete
423 subclasses:
424
425 - LeafPattern matches a single leaf node;
426 - NodePattern matches a single node (usually non-leaf);
427 - WildcardPattern matches a sequence of nodes of variable length.
428 """
429
430 # Defaults for instance variables
431 type = None # Node type (token if < 256, symbol if >= 256)
432 content = None # Optional content matching pattern
433 name = None # Optional name used to store match in results dict
434
435 def __new__(cls, *args, **kwds):
436 """Constructor that prevents BasePattern from being instantiated."""
437 assert cls is not BasePattern, "Cannot instantiate BasePattern"
438 return object.__new__(cls)
439
440 def __repr__(self):
Martin v. Löwis3faa84f2008-03-22 00:07:09 +0000441 args = [type_repr(self.type), self.content, self.name]
Martin v. Löwisef04c442008-03-19 05:04:44 +0000442 while args and args[-1] is None:
443 del args[-1]
444 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
445
446 def optimize(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000447 """
448 A subclass can define this as a hook for optimizations.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000449
450 Returns either self or another node with the same effect.
451 """
452 return self
453
454 def match(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000455 """
456 Does this pattern exactly match a node?
Martin v. Löwisef04c442008-03-19 05:04:44 +0000457
458 Returns True if it matches, False if not.
459
460 If results is not None, it must be a dict which will be
461 updated with the nodes matching named subpatterns.
462
463 Default implementation for non-wildcard patterns.
464 """
465 if self.type is not None and node.type != self.type:
466 return False
467 if self.content is not None:
468 r = None
469 if results is not None:
470 r = {}
471 if not self._submatch(node, r):
472 return False
473 if r:
474 results.update(r)
475 if results is not None and self.name:
476 results[self.name] = node
477 return True
478
479 def match_seq(self, nodes, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000480 """
481 Does this pattern exactly match a sequence of nodes?
Martin v. Löwisef04c442008-03-19 05:04:44 +0000482
483 Default implementation for non-wildcard patterns.
484 """
485 if len(nodes) != 1:
486 return False
487 return self.match(nodes[0], results)
488
489 def generate_matches(self, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000490 """
491 Generator yielding all matches for this pattern.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000492
493 Default implementation for non-wildcard patterns.
494 """
495 r = {}
496 if nodes and self.match(nodes[0], r):
497 yield 1, r
498
499
500class LeafPattern(BasePattern):
501
502 def __init__(self, type=None, content=None, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000503 """
504 Initializer. Takes optional type, content, and name.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000505
506 The type, if given must be a token type (< 256). If not given,
507 this matches any *leaf* node; the content may still be required.
508
509 The content, if given, must be a string.
510
511 If a name is given, the matching node is stored in the results
512 dict under that key.
513 """
514 if type is not None:
515 assert 0 <= type < 256, type
516 if content is not None:
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000517 assert isinstance(content, str), repr(content)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000518 self.type = type
519 self.content = content
520 self.name = name
521
522 def match(self, node, results=None):
523 """Override match() to insist on a leaf node."""
524 if not isinstance(node, Leaf):
525 return False
526 return BasePattern.match(self, node, results)
527
528 def _submatch(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000529 """
530 Match the pattern's content to the node's children.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000531
532 This assumes the node type matches and self.content is not None.
533
534 Returns True if it matches, False if not.
535
536 If results is not None, it must be a dict which will be
537 updated with the nodes matching named subpatterns.
538
539 When returning False, the results dict may still be updated.
540 """
541 return self.content == node.value
542
543
544class NodePattern(BasePattern):
545
546 wildcards = False
547
548 def __init__(self, type=None, content=None, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000549 """
550 Initializer. Takes optional type, content, and name.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000551
552 The type, if given, must be a symbol type (>= 256). If the
553 type is None this matches *any* single node (leaf or not),
554 except if content is not None, in which it only matches
555 non-leaf nodes that also match the content pattern.
556
557 The content, if not None, must be a sequence of Patterns that
558 must match the node's children exactly. If the content is
559 given, the type must not be None.
560
561 If a name is given, the matching node is stored in the results
562 dict under that key.
563 """
564 if type is not None:
565 assert type >= 256, type
566 if content is not None:
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000567 assert not isinstance(content, str), repr(content)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000568 content = list(content)
569 for i, item in enumerate(content):
570 assert isinstance(item, BasePattern), (i, item)
571 if isinstance(item, WildcardPattern):
572 self.wildcards = True
573 self.type = type
574 self.content = content
575 self.name = name
576
577 def _submatch(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000578 """
579 Match the pattern's content to the node's children.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000580
581 This assumes the node type matches and self.content is not None.
582
583 Returns True if it matches, False if not.
584
585 If results is not None, it must be a dict which will be
586 updated with the nodes matching named subpatterns.
587
588 When returning False, the results dict may still be updated.
589 """
590 if self.wildcards:
591 for c, r in generate_matches(self.content, node.children):
592 if c == len(node.children):
593 if results is not None:
594 results.update(r)
595 return True
596 return False
597 if len(self.content) != len(node.children):
598 return False
599 for subpattern, child in zip(self.content, node.children):
600 if not subpattern.match(child, results):
601 return False
602 return True
603
604
605class WildcardPattern(BasePattern):
606
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000607 """
608 A wildcard pattern can match zero or more nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000609
610 This has all the flexibility needed to implement patterns like:
611
612 .* .+ .? .{m,n}
613 (a b c | d e | f)
614 (...)* (...)+ (...)? (...){m,n}
615
616 except it always uses non-greedy matching.
617 """
618
619 def __init__(self, content=None, min=0, max=HUGE, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000620 """
621 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000622
623 Args:
624 content: optional sequence of subsequences of patterns;
625 if absent, matches one node;
626 if present, each subsequence is an alternative [*]
Ezio Melotti13925002011-03-16 11:05:33 +0200627 min: optional minimum number of times to match, default 0
628 max: optional maximum number of times to match, default HUGE
Martin v. Löwisef04c442008-03-19 05:04:44 +0000629 name: optional name assigned to this match
630
631 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
632 equivalent to (a b c | d e | f g h); if content is None,
633 this is equivalent to '.' in regular expression terms.
634 The min and max parameters work as follows:
635 min=0, max=maxint: .*
636 min=1, max=maxint: .+
637 min=0, max=1: .?
638 min=1, max=1: .
639 If content is not None, replace the dot with the parenthesized
640 list of alternatives, e.g. (a b c | d e | f g h)*
641 """
642 assert 0 <= min <= max <= HUGE, (min, max)
643 if content is not None:
644 content = tuple(map(tuple, content)) # Protect against alterations
645 # Check sanity of alternatives
646 assert len(content), repr(content) # Can't have zero alternatives
647 for alt in content:
648 assert len(alt), repr(alt) # Can have empty alternatives
649 self.content = content
650 self.min = min
651 self.max = max
652 self.name = name
653
654 def optimize(self):
655 """Optimize certain stacked wildcard patterns."""
656 subpattern = None
657 if (self.content is not None and
658 len(self.content) == 1 and len(self.content[0]) == 1):
659 subpattern = self.content[0][0]
660 if self.min == 1 and self.max == 1:
661 if self.content is None:
662 return NodePattern(name=self.name)
663 if subpattern is not None and self.name == subpattern.name:
664 return subpattern.optimize()
665 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
666 subpattern.min <= 1 and self.name == subpattern.name):
667 return WildcardPattern(subpattern.content,
668 self.min*subpattern.min,
669 self.max*subpattern.max,
670 subpattern.name)
671 return self
672
673 def match(self, node, results=None):
674 """Does this pattern exactly match a node?"""
675 return self.match_seq([node], results)
676
677 def match_seq(self, nodes, results=None):
678 """Does this pattern exactly match a sequence of nodes?"""
679 for c, r in self.generate_matches(nodes):
680 if c == len(nodes):
681 if results is not None:
682 results.update(r)
683 if self.name:
684 results[self.name] = list(nodes)
685 return True
686 return False
687
688 def generate_matches(self, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000689 """
690 Generator yielding matches for a sequence of nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000691
692 Args:
693 nodes: sequence of nodes
694
695 Yields:
696 (count, results) tuples where:
697 count: the match comprises nodes[:count];
698 results: dict containing named submatches.
699 """
700 if self.content is None:
701 # Shortcut for special case (see __init__.__doc__)
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000702 for count in range(self.min, 1 + min(len(nodes), self.max)):
Martin v. Löwisef04c442008-03-19 05:04:44 +0000703 r = {}
704 if self.name:
705 r[self.name] = nodes[:count]
706 yield count, r
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000707 elif self.name == "bare_name":
708 yield self._bare_name_matches(nodes)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000709 else:
Benjamin Peterson5cff9312008-11-28 23:01:28 +0000710 # The reason for this is that hitting the recursion limit usually
711 # results in some ugly messages about how RuntimeErrors are being
Benjamin Petersond86e4c82011-03-06 17:14:30 -0600712 # ignored. We only have to do this on CPython, though, because other
713 # implementations don't have this nasty bug in the first place.
714 if hasattr(sys, "getrefcount"):
715 save_stderr = sys.stderr
716 sys.stderr = StringIO()
Benjamin Peterson7d8d9a52008-10-04 21:04:36 +0000717 try:
718 for count, r in self._recursive_matches(nodes, 0):
719 if self.name:
720 r[self.name] = nodes[:count]
721 yield count, r
722 except RuntimeError:
723 # We fall back to the iterative pattern matching scheme if the recursive
724 # scheme hits the recursion limit.
725 for count, r in self._iterative_matches(nodes):
726 if self.name:
727 r[self.name] = nodes[:count]
728 yield count, r
Benjamin Peterson5cff9312008-11-28 23:01:28 +0000729 finally:
Benjamin Petersond86e4c82011-03-06 17:14:30 -0600730 if hasattr(sys, "getrefcount"):
731 sys.stderr = save_stderr
Benjamin Peterson7d8d9a52008-10-04 21:04:36 +0000732
733 def _iterative_matches(self, nodes):
734 """Helper to iteratively yield the matches."""
735 nodelen = len(nodes)
736 if 0 >= self.min:
737 yield 0, {}
738
739 results = []
740 # generate matches that use just one alt from self.content
741 for alt in self.content:
742 for c, r in generate_matches(alt, nodes):
743 yield c, r
744 results.append((c, r))
745
746 # for each match, iterate down the nodes
747 while results:
748 new_results = []
749 for c0, r0 in results:
750 # stop if the entire set of nodes has been matched
751 if c0 < nodelen and c0 <= self.max:
752 for alt in self.content:
753 for c1, r1 in generate_matches(alt, nodes[c0:]):
754 if c1 > 0:
755 r = {}
756 r.update(r0)
757 r.update(r1)
758 yield c0 + c1, r
759 new_results.append((c0 + c1, r))
760 results = new_results
Martin v. Löwisef04c442008-03-19 05:04:44 +0000761
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000762 def _bare_name_matches(self, nodes):
763 """Special optimized matcher for bare_name."""
764 count = 0
765 r = {}
766 done = False
767 max = len(nodes)
768 while not done and count < max:
769 done = True
770 for leaf in self.content:
771 if leaf[0].match(nodes[count], r):
772 count += 1
773 done = False
774 break
775 r[self.name] = nodes[:count]
776 return count, r
777
Martin v. Löwisef04c442008-03-19 05:04:44 +0000778 def _recursive_matches(self, nodes, count):
779 """Helper to recursively yield the matches."""
780 assert self.content is not None
781 if count >= self.min:
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000782 yield 0, {}
Martin v. Löwisef04c442008-03-19 05:04:44 +0000783 if count < self.max:
784 for alt in self.content:
785 for c0, r0 in generate_matches(alt, nodes):
786 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
787 r = {}
788 r.update(r0)
789 r.update(r1)
790 yield c0 + c1, r
791
792
793class NegatedPattern(BasePattern):
794
795 def __init__(self, content=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000796 """
797 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000798
799 The argument is either a pattern or None. If it is None, this
800 only matches an empty sequence (effectively '$' in regex
801 lingo). If it is not None, this matches whenever the argument
802 pattern doesn't have any matches.
803 """
804 if content is not None:
805 assert isinstance(content, BasePattern), repr(content)
806 self.content = content
807
808 def match(self, node):
809 # We never match a node in its entirety
810 return False
811
812 def match_seq(self, nodes):
813 # We only match an empty sequence of nodes in its entirety
814 return len(nodes) == 0
815
816 def generate_matches(self, nodes):
817 if self.content is None:
818 # Return a match if there is an empty sequence
819 if len(nodes) == 0:
820 yield 0, {}
821 else:
822 # Return a match if the argument pattern has no matches
823 for c, r in self.content.generate_matches(nodes):
824 return
825 yield 0, {}
826
827
828def generate_matches(patterns, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000829 """
830 Generator yielding matches for a sequence of patterns and nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000831
832 Args:
833 patterns: a sequence of patterns
834 nodes: a sequence of nodes
835
836 Yields:
837 (count, results) tuples where:
838 count: the entire sequence of patterns matches nodes[:count];
839 results: dict containing named submatches.
840 """
841 if not patterns:
842 yield 0, {}
843 else:
844 p, rest = patterns[0], patterns[1:]
845 for c0, r0 in p.generate_matches(nodes):
846 if not rest:
847 yield c0, r0
848 else:
849 for c1, r1 in generate_matches(rest, nodes[c0:]):
850 r = {}
851 r.update(r0)
852 r.update(r1)
853 yield c0 + c1, r