blob: fa4942f39214407aa3d7adb0d0837bccdf00d78e [file] [log] [blame]
Martin v. Löwisef04c442008-03-19 05:04:44 +00001# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
Benjamin Peterson608d8bc2009-05-05 23:23:31 +00004"""
5Python parse tree definitions.
Martin v. Löwisef04c442008-03-19 05:04:44 +00006
7This is a very concrete parse tree; we need to keep every token and
8even the comments and whitespace between tokens.
9
10There's also a pattern matching implementation here.
11"""
12
13__author__ = "Guido van Rossum <guido@python.org>"
14
Benjamin Peterson5cff9312008-11-28 23:01:28 +000015import sys
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +000016import warnings
Benjamin Peterson5cff9312008-11-28 23:01:28 +000017from io import StringIO
18
Martin v. Löwisef04c442008-03-19 05:04:44 +000019HUGE = 0x7FFFFFFF # maximum repeat count, default max
20
Martin v. Löwis3faa84f2008-03-22 00:07:09 +000021_type_reprs = {}
22def type_repr(type_num):
23 global _type_reprs
24 if not _type_reprs:
25 from .pygram import python_symbols
26 # printing tokens is possible but not as useful
27 # from .pgen2 import token // token.__dict__.items():
28 for name, val in python_symbols.__dict__.items():
29 if type(val) == int: _type_reprs[val] = name
30 return _type_reprs.setdefault(type_num, type_num)
31
Martin v. Löwisef04c442008-03-19 05:04:44 +000032class Base(object):
33
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000034 """
35 Abstract base class for Node and Leaf.
Martin v. Löwisef04c442008-03-19 05:04:44 +000036
37 This provides some default functionality and boilerplate using the
38 template pattern.
39
40 A node may be a subnode of at most one parent.
41 """
42
43 # Default values for instance variables
44 type = None # int: token number (< 256) or symbol number (>= 256)
45 parent = None # Parent node pointer, or None
46 children = () # Tuple of subnodes
47 was_changed = False
Benjamin Petersonb0871ca2010-10-14 23:03:32 +000048 was_checked = False
Martin v. Löwisef04c442008-03-19 05:04:44 +000049
50 def __new__(cls, *args, **kwds):
51 """Constructor that prevents Base from being instantiated."""
52 assert cls is not Base, "Cannot instantiate Base"
53 return object.__new__(cls)
54
55 def __eq__(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000056 """
57 Compare two nodes for equality.
Martin v. Löwisef04c442008-03-19 05:04:44 +000058
59 This calls the method _eq().
60 """
61 if self.__class__ is not other.__class__:
62 return NotImplemented
63 return self._eq(other)
64
Benjamin Petersone80b51f2009-11-02 18:33:36 +000065 __hash__ = None # For Py3 compatibility.
66
Martin v. Löwisef04c442008-03-19 05:04:44 +000067 def __ne__(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000068 """
69 Compare two nodes for inequality.
Martin v. Löwisef04c442008-03-19 05:04:44 +000070
71 This calls the method _eq().
72 """
73 if self.__class__ is not other.__class__:
74 return NotImplemented
75 return not self._eq(other)
76
77 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000078 """
79 Compare two nodes for equality.
Martin v. Löwisef04c442008-03-19 05:04:44 +000080
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000081 This is called by __eq__ and __ne__. It is only called if the two nodes
82 have the same type. This must be implemented by the concrete subclass.
83 Nodes should be considered equal if they have the same structure,
84 ignoring the prefix string and other context information.
Martin v. Löwisef04c442008-03-19 05:04:44 +000085 """
86 raise NotImplementedError
87
88 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000089 """
90 Return a cloned (deep) copy of self.
Martin v. Löwisef04c442008-03-19 05:04:44 +000091
92 This must be implemented by the concrete subclass.
93 """
94 raise NotImplementedError
95
96 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000097 """
98 Return a post-order iterator for the tree.
Martin v. Löwisef04c442008-03-19 05:04:44 +000099
100 This must be implemented by the concrete subclass.
101 """
102 raise NotImplementedError
103
104 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000105 """
106 Return a pre-order iterator for the tree.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000107
108 This must be implemented by the concrete subclass.
109 """
110 raise NotImplementedError
111
112 def set_prefix(self, prefix):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000113 """
114 Set the prefix for the node (see Leaf class).
Martin v. Löwisef04c442008-03-19 05:04:44 +0000115
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000116 DEPRECATED; use the prefix property directly.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000117 """
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000118 warnings.warn("set_prefix() is deprecated; use the prefix property",
119 DeprecationWarning, stacklevel=2)
120 self.prefix = prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000121
122 def get_prefix(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000123 """
124 Return the prefix for the node (see Leaf class).
Martin v. Löwisef04c442008-03-19 05:04:44 +0000125
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000126 DEPRECATED; use the prefix property directly.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000127 """
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000128 warnings.warn("get_prefix() is deprecated; use the prefix property",
129 DeprecationWarning, stacklevel=2)
130 return self.prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000131
132 def replace(self, new):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000133 """Replace this node with a new one in the parent."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000134 assert self.parent is not None, str(self)
135 assert new is not None
136 if not isinstance(new, list):
137 new = [new]
138 l_children = []
139 found = False
140 for ch in self.parent.children:
141 if ch is self:
142 assert not found, (self.parent.children, self, new)
143 if new is not None:
144 l_children.extend(new)
145 found = True
146 else:
147 l_children.append(ch)
148 assert found, (self.children, self, new)
149 self.parent.changed()
150 self.parent.children = l_children
151 for x in new:
152 x.parent = self.parent
153 self.parent = None
154
155 def get_lineno(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000156 """Return the line number which generated the invocant node."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000157 node = self
158 while not isinstance(node, Leaf):
159 if not node.children:
160 return
161 node = node.children[0]
162 return node.lineno
163
164 def changed(self):
165 if self.parent:
166 self.parent.changed()
167 self.was_changed = True
168
169 def remove(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000170 """
171 Remove the node from the tree. Returns the position of the node in its
172 parent's children before it was removed.
173 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000174 if self.parent:
175 for i, node in enumerate(self.parent.children):
176 if node is self:
177 self.parent.changed()
178 del self.parent.children[i]
179 self.parent = None
180 return i
181
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000182 @property
183 def next_sibling(self):
184 """
185 The node immediately following the invocant in their parent's children
186 list. If the invocant does not have a next sibling, it is None
187 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000188 if self.parent is None:
189 return None
190
191 # Can't use index(); we need to test by identity
Martin v. Löwis3de92bf2008-04-10 02:50:50 +0000192 for i, child in enumerate(self.parent.children):
193 if child is self:
Martin v. Löwisef04c442008-03-19 05:04:44 +0000194 try:
195 return self.parent.children[i+1]
196 except IndexError:
197 return None
198
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000199 @property
200 def prev_sibling(self):
201 """
202 The node immediately preceding the invocant in their parent's children
203 list. If the invocant does not have a previous sibling, it is None.
204 """
Martin v. Löwis3de92bf2008-04-10 02:50:50 +0000205 if self.parent is None:
206 return None
207
208 # Can't use index(); we need to test by identity
209 for i, child in enumerate(self.parent.children):
210 if child is self:
211 if i == 0:
212 return None
213 return self.parent.children[i-1]
214
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000215 def leaves(self):
216 for child in self.children:
217 for x in child.leaves():
218 yield x
219
220 def depth(self):
221 if self.parent is None:
222 return 0
223 return 1 + self.parent.depth()
224
Martin v. Löwisef04c442008-03-19 05:04:44 +0000225 def get_suffix(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000226 """
227 Return the string immediately following the invocant node. This is
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000228 effectively equivalent to node.next_sibling.prefix
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000229 """
230 next_sib = self.next_sibling
Martin v. Löwisef04c442008-03-19 05:04:44 +0000231 if next_sib is None:
232 return ""
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000233 return next_sib.prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000234
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000235 if sys.version_info < (3, 0):
236 def __str__(self):
237 return str(self).encode("ascii")
238
Martin v. Löwisef04c442008-03-19 05:04:44 +0000239class Node(Base):
240
241 """Concrete implementation for interior nodes."""
242
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000243 def __init__(self,type, children,
244 context=None,
245 prefix=None,
246 fixers_applied=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000247 """
248 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000249
250 Takes a type constant (a symbol number >= 256), a sequence of
251 child nodes, and an optional context keyword argument.
252
253 As a side effect, the parent pointers of the children are updated.
254 """
255 assert type >= 256, type
256 self.type = type
257 self.children = list(children)
258 for ch in self.children:
259 assert ch.parent is None, repr(ch)
260 ch.parent = self
261 if prefix is not None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000262 self.prefix = prefix
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000263 if fixers_applied:
264 self.fixers_applied = fixers_applied[:]
265 else:
266 self.fixers_applied = None
Martin v. Löwisef04c442008-03-19 05:04:44 +0000267
268 def __repr__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000269 """Return a canonical string representation."""
Martin v. Löwis3faa84f2008-03-22 00:07:09 +0000270 return "%s(%s, %r)" % (self.__class__.__name__,
271 type_repr(self.type),
Martin v. Löwisef04c442008-03-19 05:04:44 +0000272 self.children)
273
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000274 def __unicode__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000275 """
276 Return a pretty string representation.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000277
278 This reproduces the input source exactly.
279 """
280 return "".join(map(str, self.children))
281
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000282 if sys.version_info > (3, 0):
283 __str__ = __unicode__
284
Martin v. Löwisef04c442008-03-19 05:04:44 +0000285 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000286 """Compare two nodes for equality."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000287 return (self.type, self.children) == (other.type, other.children)
288
289 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000290 """Return a cloned (deep) copy of self."""
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000291 return Node(self.type, [ch.clone() for ch in self.children],
292 fixers_applied=self.fixers_applied)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000293
294 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000295 """Return a post-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000296 for child in self.children:
297 for node in child.post_order():
298 yield node
299 yield self
300
301 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000302 """Return a pre-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000303 yield self
304 for child in self.children:
Benjamin Peterson68c80ed2010-08-08 19:23:25 +0000305 for node in child.pre_order():
Martin v. Löwisef04c442008-03-19 05:04:44 +0000306 yield node
307
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000308 def _prefix_getter(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000309 """
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000310 The whitespace and comments preceding this node in the input.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000311 """
312 if not self.children:
313 return ""
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000314 return self.children[0].prefix
315
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000316 def _prefix_setter(self, prefix):
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000317 if self.children:
318 self.children[0].prefix = prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000319
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000320 prefix = property(_prefix_getter, _prefix_setter)
321
Martin v. Löwisef04c442008-03-19 05:04:44 +0000322 def set_child(self, i, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000323 """
324 Equivalent to 'node.children[i] = child'. This method also sets the
325 child's parent attribute appropriately.
326 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000327 child.parent = self
328 self.children[i].parent = None
329 self.children[i] = child
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000330 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000331
332 def insert_child(self, i, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000333 """
334 Equivalent to 'node.children.insert(i, child)'. This method also sets
335 the child's parent attribute appropriately.
336 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000337 child.parent = self
338 self.children.insert(i, child)
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000339 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000340
341 def append_child(self, child):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000342 """
343 Equivalent to 'node.children.append(child)'. This method also sets the
344 child's parent attribute appropriately.
345 """
Martin v. Löwisef04c442008-03-19 05:04:44 +0000346 child.parent = self
347 self.children.append(child)
Benjamin Peterson0b24b3d2008-12-16 03:57:54 +0000348 self.changed()
Martin v. Löwisef04c442008-03-19 05:04:44 +0000349
350
351class Leaf(Base):
352
353 """Concrete implementation for leaf nodes."""
354
355 # Default values for instance variables
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000356 _prefix = "" # Whitespace and comments preceding this token in the input
357 lineno = 0 # Line where this token starts in the input
358 column = 0 # Column where this token tarts in the input
Martin v. Löwisef04c442008-03-19 05:04:44 +0000359
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000360 def __init__(self, type, value,
361 context=None,
362 prefix=None,
363 fixers_applied=[]):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000364 """
365 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000366
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000367 Takes a type constant (a token number < 256), a string value, and an
368 optional context keyword argument.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000369 """
370 assert 0 <= type < 256, type
371 if context is not None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000372 self._prefix, (self.lineno, self.column) = context
Martin v. Löwisef04c442008-03-19 05:04:44 +0000373 self.type = type
374 self.value = value
375 if prefix is not None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000376 self._prefix = prefix
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000377 self.fixers_applied = fixers_applied[:]
Martin v. Löwisef04c442008-03-19 05:04:44 +0000378
379 def __repr__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000380 """Return a canonical string representation."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000381 return "%s(%r, %r)" % (self.__class__.__name__,
382 self.type,
383 self.value)
384
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000385 def __unicode__(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000386 """
387 Return a pretty string representation.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000388
389 This reproduces the input source exactly.
390 """
391 return self.prefix + str(self.value)
392
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000393 if sys.version_info > (3, 0):
394 __str__ = __unicode__
395
Martin v. Löwisef04c442008-03-19 05:04:44 +0000396 def _eq(self, other):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000397 """Compare two nodes for equality."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000398 return (self.type, self.value) == (other.type, other.value)
399
400 def clone(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000401 """Return a cloned (deep) copy of self."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000402 return Leaf(self.type, self.value,
Benjamin Petersonb0871ca2010-10-14 23:03:32 +0000403 (self.prefix, (self.lineno, self.column)),
404 fixers_applied=self.fixers_applied)
405
406 def leaves(self):
407 yield self
Martin v. Löwisef04c442008-03-19 05:04:44 +0000408
409 def post_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000410 """Return a post-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000411 yield self
412
413 def pre_order(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000414 """Return a pre-order iterator for the tree."""
Martin v. Löwisef04c442008-03-19 05:04:44 +0000415 yield self
416
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000417 def _prefix_getter(self):
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000418 """
419 The whitespace and comments preceding this token in the input.
420 """
421 return self._prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000422
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000423 def _prefix_setter(self, prefix):
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000424 self.changed()
425 self._prefix = prefix
Martin v. Löwisef04c442008-03-19 05:04:44 +0000426
Benjamin Petersonc9e833f2010-05-08 15:46:00 +0000427 prefix = property(_prefix_getter, _prefix_setter)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000428
429def convert(gr, raw_node):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000430 """
431 Convert raw node information to a Node or Leaf instance.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000432
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000433 This is passed to the parser driver which calls it whenever a reduction of a
434 grammar rule produces a new complete node, so that the tree is build
435 strictly bottom-up.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000436 """
437 type, value, context, children = raw_node
438 if children or type in gr.number2symbol:
439 # If there's exactly one child, return that child instead of
440 # creating a new node.
441 if len(children) == 1:
442 return children[0]
443 return Node(type, children, context=context)
444 else:
445 return Leaf(type, value, context=context)
446
447
448class BasePattern(object):
449
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000450 """
451 A pattern is a tree matching pattern.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000452
453 It looks for a specific node type (token or symbol), and
454 optionally for a specific content.
455
456 This is an abstract base class. There are three concrete
457 subclasses:
458
459 - LeafPattern matches a single leaf node;
460 - NodePattern matches a single node (usually non-leaf);
461 - WildcardPattern matches a sequence of nodes of variable length.
462 """
463
464 # Defaults for instance variables
465 type = None # Node type (token if < 256, symbol if >= 256)
466 content = None # Optional content matching pattern
467 name = None # Optional name used to store match in results dict
468
469 def __new__(cls, *args, **kwds):
470 """Constructor that prevents BasePattern from being instantiated."""
471 assert cls is not BasePattern, "Cannot instantiate BasePattern"
472 return object.__new__(cls)
473
474 def __repr__(self):
Martin v. Löwis3faa84f2008-03-22 00:07:09 +0000475 args = [type_repr(self.type), self.content, self.name]
Martin v. Löwisef04c442008-03-19 05:04:44 +0000476 while args and args[-1] is None:
477 del args[-1]
478 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
479
480 def optimize(self):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000481 """
482 A subclass can define this as a hook for optimizations.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000483
484 Returns either self or another node with the same effect.
485 """
486 return self
487
488 def match(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000489 """
490 Does this pattern exactly match a node?
Martin v. Löwisef04c442008-03-19 05:04:44 +0000491
492 Returns True if it matches, False if not.
493
494 If results is not None, it must be a dict which will be
495 updated with the nodes matching named subpatterns.
496
497 Default implementation for non-wildcard patterns.
498 """
499 if self.type is not None and node.type != self.type:
500 return False
501 if self.content is not None:
502 r = None
503 if results is not None:
504 r = {}
505 if not self._submatch(node, r):
506 return False
507 if r:
508 results.update(r)
509 if results is not None and self.name:
510 results[self.name] = node
511 return True
512
513 def match_seq(self, nodes, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000514 """
515 Does this pattern exactly match a sequence of nodes?
Martin v. Löwisef04c442008-03-19 05:04:44 +0000516
517 Default implementation for non-wildcard patterns.
518 """
519 if len(nodes) != 1:
520 return False
521 return self.match(nodes[0], results)
522
523 def generate_matches(self, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000524 """
525 Generator yielding all matches for this pattern.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000526
527 Default implementation for non-wildcard patterns.
528 """
529 r = {}
530 if nodes and self.match(nodes[0], r):
531 yield 1, r
532
533
534class LeafPattern(BasePattern):
535
536 def __init__(self, type=None, content=None, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000537 """
538 Initializer. Takes optional type, content, and name.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000539
540 The type, if given must be a token type (< 256). If not given,
541 this matches any *leaf* node; the content may still be required.
542
543 The content, if given, must be a string.
544
545 If a name is given, the matching node is stored in the results
546 dict under that key.
547 """
548 if type is not None:
549 assert 0 <= type < 256, type
550 if content is not None:
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000551 assert isinstance(content, str), repr(content)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000552 self.type = type
553 self.content = content
554 self.name = name
555
556 def match(self, node, results=None):
557 """Override match() to insist on a leaf node."""
558 if not isinstance(node, Leaf):
559 return False
560 return BasePattern.match(self, node, results)
561
562 def _submatch(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000563 """
564 Match the pattern's content to the node's children.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000565
566 This assumes the node type matches and self.content is not None.
567
568 Returns True if it matches, False if not.
569
570 If results is not None, it must be a dict which will be
571 updated with the nodes matching named subpatterns.
572
573 When returning False, the results dict may still be updated.
574 """
575 return self.content == node.value
576
577
578class NodePattern(BasePattern):
579
580 wildcards = False
581
582 def __init__(self, type=None, content=None, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000583 """
584 Initializer. Takes optional type, content, and name.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000585
586 The type, if given, must be a symbol type (>= 256). If the
587 type is None this matches *any* single node (leaf or not),
588 except if content is not None, in which it only matches
589 non-leaf nodes that also match the content pattern.
590
591 The content, if not None, must be a sequence of Patterns that
592 must match the node's children exactly. If the content is
593 given, the type must not be None.
594
595 If a name is given, the matching node is stored in the results
596 dict under that key.
597 """
598 if type is not None:
599 assert type >= 256, type
600 if content is not None:
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000601 assert not isinstance(content, str), repr(content)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000602 content = list(content)
603 for i, item in enumerate(content):
604 assert isinstance(item, BasePattern), (i, item)
605 if isinstance(item, WildcardPattern):
606 self.wildcards = True
607 self.type = type
608 self.content = content
609 self.name = name
610
611 def _submatch(self, node, results=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000612 """
613 Match the pattern's content to the node's children.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000614
615 This assumes the node type matches and self.content is not None.
616
617 Returns True if it matches, False if not.
618
619 If results is not None, it must be a dict which will be
620 updated with the nodes matching named subpatterns.
621
622 When returning False, the results dict may still be updated.
623 """
624 if self.wildcards:
625 for c, r in generate_matches(self.content, node.children):
626 if c == len(node.children):
627 if results is not None:
628 results.update(r)
629 return True
630 return False
631 if len(self.content) != len(node.children):
632 return False
633 for subpattern, child in zip(self.content, node.children):
634 if not subpattern.match(child, results):
635 return False
636 return True
637
638
639class WildcardPattern(BasePattern):
640
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000641 """
642 A wildcard pattern can match zero or more nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000643
644 This has all the flexibility needed to implement patterns like:
645
646 .* .+ .? .{m,n}
647 (a b c | d e | f)
648 (...)* (...)+ (...)? (...){m,n}
649
650 except it always uses non-greedy matching.
651 """
652
653 def __init__(self, content=None, min=0, max=HUGE, name=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000654 """
655 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000656
657 Args:
658 content: optional sequence of subsequences of patterns;
659 if absent, matches one node;
660 if present, each subsequence is an alternative [*]
Ezio Melotti13925002011-03-16 11:05:33 +0200661 min: optional minimum number of times to match, default 0
662 max: optional maximum number of times to match, default HUGE
Martin v. Löwisef04c442008-03-19 05:04:44 +0000663 name: optional name assigned to this match
664
665 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
666 equivalent to (a b c | d e | f g h); if content is None,
667 this is equivalent to '.' in regular expression terms.
668 The min and max parameters work as follows:
669 min=0, max=maxint: .*
670 min=1, max=maxint: .+
671 min=0, max=1: .?
672 min=1, max=1: .
673 If content is not None, replace the dot with the parenthesized
674 list of alternatives, e.g. (a b c | d e | f g h)*
675 """
676 assert 0 <= min <= max <= HUGE, (min, max)
677 if content is not None:
678 content = tuple(map(tuple, content)) # Protect against alterations
679 # Check sanity of alternatives
680 assert len(content), repr(content) # Can't have zero alternatives
681 for alt in content:
682 assert len(alt), repr(alt) # Can have empty alternatives
683 self.content = content
684 self.min = min
685 self.max = max
686 self.name = name
687
688 def optimize(self):
689 """Optimize certain stacked wildcard patterns."""
690 subpattern = None
691 if (self.content is not None and
692 len(self.content) == 1 and len(self.content[0]) == 1):
693 subpattern = self.content[0][0]
694 if self.min == 1 and self.max == 1:
695 if self.content is None:
696 return NodePattern(name=self.name)
697 if subpattern is not None and self.name == subpattern.name:
698 return subpattern.optimize()
699 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
700 subpattern.min <= 1 and self.name == subpattern.name):
701 return WildcardPattern(subpattern.content,
702 self.min*subpattern.min,
703 self.max*subpattern.max,
704 subpattern.name)
705 return self
706
707 def match(self, node, results=None):
708 """Does this pattern exactly match a node?"""
709 return self.match_seq([node], results)
710
711 def match_seq(self, nodes, results=None):
712 """Does this pattern exactly match a sequence of nodes?"""
713 for c, r in self.generate_matches(nodes):
714 if c == len(nodes):
715 if results is not None:
716 results.update(r)
717 if self.name:
718 results[self.name] = list(nodes)
719 return True
720 return False
721
722 def generate_matches(self, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000723 """
724 Generator yielding matches for a sequence of nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000725
726 Args:
727 nodes: sequence of nodes
728
729 Yields:
730 (count, results) tuples where:
731 count: the match comprises nodes[:count];
732 results: dict containing named submatches.
733 """
734 if self.content is None:
735 # Shortcut for special case (see __init__.__doc__)
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000736 for count in range(self.min, 1 + min(len(nodes), self.max)):
Martin v. Löwisef04c442008-03-19 05:04:44 +0000737 r = {}
738 if self.name:
739 r[self.name] = nodes[:count]
740 yield count, r
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000741 elif self.name == "bare_name":
742 yield self._bare_name_matches(nodes)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000743 else:
Benjamin Peterson5cff9312008-11-28 23:01:28 +0000744 # The reason for this is that hitting the recursion limit usually
745 # results in some ugly messages about how RuntimeErrors are being
Benjamin Petersond86e4c82011-03-06 17:14:30 -0600746 # ignored. We only have to do this on CPython, though, because other
747 # implementations don't have this nasty bug in the first place.
748 if hasattr(sys, "getrefcount"):
749 save_stderr = sys.stderr
750 sys.stderr = StringIO()
Benjamin Peterson7d8d9a52008-10-04 21:04:36 +0000751 try:
752 for count, r in self._recursive_matches(nodes, 0):
753 if self.name:
754 r[self.name] = nodes[:count]
755 yield count, r
756 except RuntimeError:
757 # We fall back to the iterative pattern matching scheme if the recursive
758 # scheme hits the recursion limit.
759 for count, r in self._iterative_matches(nodes):
760 if self.name:
761 r[self.name] = nodes[:count]
762 yield count, r
Benjamin Peterson5cff9312008-11-28 23:01:28 +0000763 finally:
Benjamin Petersond86e4c82011-03-06 17:14:30 -0600764 if hasattr(sys, "getrefcount"):
765 sys.stderr = save_stderr
Benjamin Peterson7d8d9a52008-10-04 21:04:36 +0000766
767 def _iterative_matches(self, nodes):
768 """Helper to iteratively yield the matches."""
769 nodelen = len(nodes)
770 if 0 >= self.min:
771 yield 0, {}
772
773 results = []
774 # generate matches that use just one alt from self.content
775 for alt in self.content:
776 for c, r in generate_matches(alt, nodes):
777 yield c, r
778 results.append((c, r))
779
780 # for each match, iterate down the nodes
781 while results:
782 new_results = []
783 for c0, r0 in results:
784 # stop if the entire set of nodes has been matched
785 if c0 < nodelen and c0 <= self.max:
786 for alt in self.content:
787 for c1, r1 in generate_matches(alt, nodes[c0:]):
788 if c1 > 0:
789 r = {}
790 r.update(r0)
791 r.update(r1)
792 yield c0 + c1, r
793 new_results.append((c0 + c1, r))
794 results = new_results
Martin v. Löwisef04c442008-03-19 05:04:44 +0000795
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000796 def _bare_name_matches(self, nodes):
797 """Special optimized matcher for bare_name."""
798 count = 0
799 r = {}
800 done = False
801 max = len(nodes)
802 while not done and count < max:
803 done = True
804 for leaf in self.content:
805 if leaf[0].match(nodes[count], r):
806 count += 1
807 done = False
808 break
809 r[self.name] = nodes[:count]
810 return count, r
811
Martin v. Löwisef04c442008-03-19 05:04:44 +0000812 def _recursive_matches(self, nodes, count):
813 """Helper to recursively yield the matches."""
814 assert self.content is not None
815 if count >= self.min:
Benjamin Peterson4cdf5722008-07-17 02:21:56 +0000816 yield 0, {}
Martin v. Löwisef04c442008-03-19 05:04:44 +0000817 if count < self.max:
818 for alt in self.content:
819 for c0, r0 in generate_matches(alt, nodes):
820 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
821 r = {}
822 r.update(r0)
823 r.update(r1)
824 yield c0 + c1, r
825
826
827class NegatedPattern(BasePattern):
828
829 def __init__(self, content=None):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000830 """
831 Initializer.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000832
833 The argument is either a pattern or None. If it is None, this
834 only matches an empty sequence (effectively '$' in regex
835 lingo). If it is not None, this matches whenever the argument
836 pattern doesn't have any matches.
837 """
838 if content is not None:
839 assert isinstance(content, BasePattern), repr(content)
840 self.content = content
841
842 def match(self, node):
843 # We never match a node in its entirety
844 return False
845
846 def match_seq(self, nodes):
847 # We only match an empty sequence of nodes in its entirety
848 return len(nodes) == 0
849
850 def generate_matches(self, nodes):
851 if self.content is None:
852 # Return a match if there is an empty sequence
853 if len(nodes) == 0:
854 yield 0, {}
855 else:
856 # Return a match if the argument pattern has no matches
857 for c, r in self.content.generate_matches(nodes):
858 return
859 yield 0, {}
860
861
862def generate_matches(patterns, nodes):
Benjamin Peterson608d8bc2009-05-05 23:23:31 +0000863 """
864 Generator yielding matches for a sequence of patterns and nodes.
Martin v. Löwisef04c442008-03-19 05:04:44 +0000865
866 Args:
867 patterns: a sequence of patterns
868 nodes: a sequence of nodes
869
870 Yields:
871 (count, results) tuples where:
872 count: the entire sequence of patterns matches nodes[:count];
873 results: dict containing named submatches.
874 """
875 if not patterns:
876 yield 0, {}
877 else:
878 p, rest = patterns[0], patterns[1:]
879 for c0, r0 in p.generate_matches(nodes):
880 if not rest:
881 yield c0, r0
882 else:
883 for c1, r1 in generate_matches(rest, nodes[c0:]):
884 r = {}
885 r.update(r0)
886 r.update(r1)
887 yield c0 + c1, r