| Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 1 | # Copyright 2006 Google, Inc. All Rights Reserved. | 
 | 2 | # Licensed to PSF under a Contributor Agreement. | 
 | 3 |  | 
 | 4 | """Python parse tree definitions. | 
 | 5 |  | 
 | 6 | This is a very concrete parse tree; we need to keep every token and | 
 | 7 | even the comments and whitespace between tokens. | 
 | 8 |  | 
 | 9 | There's also a pattern matching implementation here. | 
 | 10 | """ | 
 | 11 |  | 
 | 12 | __author__ = "Guido van Rossum <guido@python.org>" | 
 | 13 |  | 
| Benjamin Peterson | 2555d9d | 2008-11-28 22:12:14 +0000 | [diff] [blame] | 14 | import sys | 
 | 15 | from StringIO import StringIO | 
 | 16 |  | 
| Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 17 |  | 
 | 18 | HUGE = 0x7FFFFFFF  # maximum repeat count, default max | 
 | 19 |  | 
| Martin v. Löwis | baf267c | 2008-03-22 00:01:12 +0000 | [diff] [blame] | 20 | _type_reprs = {} | 
 | 21 | def type_repr(type_num): | 
 | 22 |     global _type_reprs | 
 | 23 |     if not _type_reprs: | 
 | 24 |         from .pygram import python_symbols | 
 | 25 |         # printing tokens is possible but not as useful | 
 | 26 |         # from .pgen2 import token // token.__dict__.items(): | 
 | 27 |         for name, val in python_symbols.__dict__.items(): | 
 | 28 |             if type(val) == int: _type_reprs[val] = name | 
 | 29 |     return _type_reprs.setdefault(type_num, type_num) | 
 | 30 |  | 
| Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 31 |  | 
 | 32 | class Base(object): | 
 | 33 |  | 
 | 34 |     """Abstract base class for Node and Leaf. | 
 | 35 |  | 
 | 36 |     This provides some default functionality and boilerplate using the | 
 | 37 |     template pattern. | 
 | 38 |  | 
 | 39 |     A node may be a subnode of at most one parent. | 
 | 40 |     """ | 
 | 41 |  | 
 | 42 |     # Default values for instance variables | 
 | 43 |     type = None    # int: token number (< 256) or symbol number (>= 256) | 
 | 44 |     parent = None  # Parent node pointer, or None | 
 | 45 |     children = ()  # Tuple of subnodes | 
 | 46 |     was_changed = False | 
 | 47 |  | 
 | 48 |     def __new__(cls, *args, **kwds): | 
 | 49 |         """Constructor that prevents Base from being instantiated.""" | 
 | 50 |         assert cls is not Base, "Cannot instantiate Base" | 
 | 51 |         return object.__new__(cls) | 
 | 52 |  | 
 | 53 |     def __eq__(self, other): | 
 | 54 |         """Compares two nodes for equality. | 
 | 55 |  | 
 | 56 |         This calls the method _eq(). | 
 | 57 |         """ | 
 | 58 |         if self.__class__ is not other.__class__: | 
 | 59 |             return NotImplemented | 
 | 60 |         return self._eq(other) | 
 | 61 |  | 
 | 62 |     def __ne__(self, other): | 
 | 63 |         """Compares two nodes for inequality. | 
 | 64 |  | 
 | 65 |         This calls the method _eq(). | 
 | 66 |         """ | 
 | 67 |         if self.__class__ is not other.__class__: | 
 | 68 |             return NotImplemented | 
 | 69 |         return not self._eq(other) | 
 | 70 |  | 
 | 71 |     def _eq(self, other): | 
 | 72 |         """Compares two nodes for equality. | 
 | 73 |  | 
 | 74 |         This is called by __eq__ and __ne__.  It is only called if the | 
 | 75 |         two nodes have the same type.  This must be implemented by the | 
 | 76 |         concrete subclass.  Nodes should be considered equal if they | 
 | 77 |         have the same structure, ignoring the prefix string and other | 
 | 78 |         context information. | 
 | 79 |         """ | 
 | 80 |         raise NotImplementedError | 
 | 81 |  | 
 | 82 |     def clone(self): | 
 | 83 |         """Returns a cloned (deep) copy of self. | 
 | 84 |  | 
 | 85 |         This must be implemented by the concrete subclass. | 
 | 86 |         """ | 
 | 87 |         raise NotImplementedError | 
 | 88 |  | 
 | 89 |     def post_order(self): | 
 | 90 |         """Returns a post-order iterator for the tree. | 
 | 91 |  | 
 | 92 |         This must be implemented by the concrete subclass. | 
 | 93 |         """ | 
 | 94 |         raise NotImplementedError | 
 | 95 |  | 
 | 96 |     def pre_order(self): | 
 | 97 |         """Returns a pre-order iterator for the tree. | 
 | 98 |  | 
 | 99 |         This must be implemented by the concrete subclass. | 
 | 100 |         """ | 
 | 101 |         raise NotImplementedError | 
 | 102 |  | 
 | 103 |     def set_prefix(self, prefix): | 
 | 104 |         """Sets the prefix for the node (see Leaf class). | 
 | 105 |  | 
 | 106 |         This must be implemented by the concrete subclass. | 
 | 107 |         """ | 
 | 108 |         raise NotImplementedError | 
 | 109 |  | 
 | 110 |     def get_prefix(self): | 
 | 111 |         """Returns the prefix for the node (see Leaf class). | 
 | 112 |  | 
 | 113 |         This must be implemented by the concrete subclass. | 
 | 114 |         """ | 
 | 115 |         raise NotImplementedError | 
 | 116 |  | 
 | 117 |     def replace(self, new): | 
 | 118 |         """Replaces this node with a new one in the parent.""" | 
 | 119 |         assert self.parent is not None, str(self) | 
 | 120 |         assert new is not None | 
 | 121 |         if not isinstance(new, list): | 
 | 122 |             new = [new] | 
 | 123 |         l_children = [] | 
 | 124 |         found = False | 
 | 125 |         for ch in self.parent.children: | 
 | 126 |             if ch is self: | 
 | 127 |                 assert not found, (self.parent.children, self, new) | 
 | 128 |                 if new is not None: | 
 | 129 |                     l_children.extend(new) | 
 | 130 |                 found = True | 
 | 131 |             else: | 
 | 132 |                 l_children.append(ch) | 
 | 133 |         assert found, (self.children, self, new) | 
 | 134 |         self.parent.changed() | 
 | 135 |         self.parent.children = l_children | 
 | 136 |         for x in new: | 
 | 137 |             x.parent = self.parent | 
 | 138 |         self.parent = None | 
 | 139 |  | 
 | 140 |     def get_lineno(self): | 
 | 141 |         """Returns the line number which generated the invocant node.""" | 
 | 142 |         node = self | 
 | 143 |         while not isinstance(node, Leaf): | 
 | 144 |             if not node.children: | 
 | 145 |                 return | 
 | 146 |             node = node.children[0] | 
 | 147 |         return node.lineno | 
 | 148 |  | 
 | 149 |     def changed(self): | 
 | 150 |         if self.parent: | 
 | 151 |             self.parent.changed() | 
 | 152 |         self.was_changed = True | 
 | 153 |  | 
 | 154 |     def remove(self): | 
 | 155 |         """Remove the node from the tree. Returns the position of the node | 
 | 156 |         in its parent's children before it was removed.""" | 
 | 157 |         if self.parent: | 
 | 158 |             for i, node in enumerate(self.parent.children): | 
 | 159 |                 if node is self: | 
 | 160 |                     self.parent.changed() | 
 | 161 |                     del self.parent.children[i] | 
 | 162 |                     self.parent = None | 
 | 163 |                     return i | 
 | 164 |  | 
 | 165 |     def get_next_sibling(self): | 
 | 166 |         """Return the node immediately following the invocant in their | 
 | 167 |         parent's children list. If the invocant does not have a next | 
 | 168 |         sibling, return None.""" | 
 | 169 |         if self.parent is None: | 
 | 170 |             return None | 
 | 171 |  | 
 | 172 |         # Can't use index(); we need to test by identity | 
| Martin v. Löwis | 60a819d | 2008-04-10 02:48:01 +0000 | [diff] [blame] | 173 |         for i, child in enumerate(self.parent.children): | 
 | 174 |             if child is self: | 
| Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 175 |                 try: | 
 | 176 |                     return self.parent.children[i+1] | 
 | 177 |                 except IndexError: | 
 | 178 |                     return None | 
 | 179 |  | 
| Martin v. Löwis | 60a819d | 2008-04-10 02:48:01 +0000 | [diff] [blame] | 180 |     def get_prev_sibling(self): | 
 | 181 |         """Return the node immediately preceding the invocant in their | 
 | 182 |         parent's children list. If the invocant does not have a previous | 
 | 183 |         sibling, return None.""" | 
 | 184 |         if self.parent is None: | 
 | 185 |             return None | 
 | 186 |  | 
 | 187 |         # Can't use index(); we need to test by identity | 
 | 188 |         for i, child in enumerate(self.parent.children): | 
 | 189 |             if child is self: | 
 | 190 |                 if i == 0: | 
 | 191 |                     return None | 
 | 192 |                 return self.parent.children[i-1] | 
 | 193 |  | 
| Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 194 |     def get_suffix(self): | 
 | 195 |         """Return the string immediately following the invocant node. This | 
 | 196 |         is effectively equivalent to node.get_next_sibling().get_prefix()""" | 
 | 197 |         next_sib = self.get_next_sibling() | 
 | 198 |         if next_sib is None: | 
 | 199 |             return "" | 
 | 200 |         return next_sib.get_prefix() | 
 | 201 |  | 
 | 202 |  | 
 | 203 | class Node(Base): | 
 | 204 |  | 
 | 205 |     """Concrete implementation for interior nodes.""" | 
 | 206 |  | 
 | 207 |     def __init__(self, type, children, context=None, prefix=None): | 
 | 208 |         """Initializer. | 
 | 209 |  | 
 | 210 |         Takes a type constant (a symbol number >= 256), a sequence of | 
 | 211 |         child nodes, and an optional context keyword argument. | 
 | 212 |  | 
 | 213 |         As a side effect, the parent pointers of the children are updated. | 
 | 214 |         """ | 
 | 215 |         assert type >= 256, type | 
 | 216 |         self.type = type | 
 | 217 |         self.children = list(children) | 
 | 218 |         for ch in self.children: | 
 | 219 |             assert ch.parent is None, repr(ch) | 
 | 220 |             ch.parent = self | 
 | 221 |         if prefix is not None: | 
 | 222 |             self.set_prefix(prefix) | 
 | 223 |  | 
 | 224 |     def __repr__(self): | 
 | 225 |         """Returns a canonical string representation.""" | 
| Martin v. Löwis | baf267c | 2008-03-22 00:01:12 +0000 | [diff] [blame] | 226 |         return "%s(%s, %r)" % (self.__class__.__name__, | 
 | 227 |                                type_repr(self.type), | 
| Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 228 |                                self.children) | 
 | 229 |  | 
 | 230 |     def __str__(self): | 
 | 231 |         """Returns a pretty string representation. | 
 | 232 |  | 
 | 233 |         This reproduces the input source exactly. | 
 | 234 |         """ | 
 | 235 |         return "".join(map(str, self.children)) | 
 | 236 |  | 
 | 237 |     def _eq(self, other): | 
 | 238 |         """Compares two nodes for equality.""" | 
 | 239 |         return (self.type, self.children) == (other.type, other.children) | 
 | 240 |  | 
 | 241 |     def clone(self): | 
 | 242 |         """Returns a cloned (deep) copy of self.""" | 
 | 243 |         return Node(self.type, [ch.clone() for ch in self.children]) | 
 | 244 |  | 
 | 245 |     def post_order(self): | 
 | 246 |         """Returns a post-order iterator for the tree.""" | 
 | 247 |         for child in self.children: | 
 | 248 |             for node in child.post_order(): | 
 | 249 |                 yield node | 
 | 250 |         yield self | 
 | 251 |  | 
 | 252 |     def pre_order(self): | 
 | 253 |         """Returns a pre-order iterator for the tree.""" | 
 | 254 |         yield self | 
 | 255 |         for child in self.children: | 
 | 256 |             for node in child.post_order(): | 
 | 257 |                 yield node | 
 | 258 |  | 
 | 259 |     def set_prefix(self, prefix): | 
 | 260 |         """Sets the prefix for the node. | 
 | 261 |  | 
 | 262 |         This passes the responsibility on to the first child. | 
 | 263 |         """ | 
 | 264 |         if self.children: | 
 | 265 |             self.children[0].set_prefix(prefix) | 
 | 266 |  | 
 | 267 |     def get_prefix(self): | 
 | 268 |         """Returns the prefix for the node. | 
 | 269 |  | 
 | 270 |         This passes the call on to the first child. | 
 | 271 |         """ | 
 | 272 |         if not self.children: | 
 | 273 |             return "" | 
 | 274 |         return self.children[0].get_prefix() | 
 | 275 |  | 
 | 276 |     def set_child(self, i, child): | 
 | 277 |         """Equivalent to 'node.children[i] = child'. This method also sets the | 
 | 278 |         child's parent attribute appropriately.""" | 
 | 279 |         child.parent = self | 
 | 280 |         self.children[i].parent = None | 
 | 281 |         self.children[i] = child | 
| Benjamin Peterson | 43caaa0 | 2008-12-16 03:35:28 +0000 | [diff] [blame^] | 282 |         self.changed() | 
| Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 283 |  | 
 | 284 |     def insert_child(self, i, child): | 
 | 285 |         """Equivalent to 'node.children.insert(i, child)'. This method also | 
 | 286 |         sets the child's parent attribute appropriately.""" | 
 | 287 |         child.parent = self | 
 | 288 |         self.children.insert(i, child) | 
| Benjamin Peterson | 43caaa0 | 2008-12-16 03:35:28 +0000 | [diff] [blame^] | 289 |         self.changed() | 
| Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 290 |  | 
 | 291 |     def append_child(self, child): | 
 | 292 |         """Equivalent to 'node.children.append(child)'. This method also | 
 | 293 |         sets the child's parent attribute appropriately.""" | 
 | 294 |         child.parent = self | 
 | 295 |         self.children.append(child) | 
| Benjamin Peterson | 43caaa0 | 2008-12-16 03:35:28 +0000 | [diff] [blame^] | 296 |         self.changed() | 
| Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 297 |  | 
 | 298 |  | 
 | 299 | class Leaf(Base): | 
 | 300 |  | 
 | 301 |     """Concrete implementation for leaf nodes.""" | 
 | 302 |  | 
 | 303 |     # Default values for instance variables | 
 | 304 |     prefix = ""  # Whitespace and comments preceding this token in the input | 
 | 305 |     lineno = 0   # Line where this token starts in the input | 
 | 306 |     column = 0   # Column where this token tarts in the input | 
 | 307 |  | 
 | 308 |     def __init__(self, type, value, context=None, prefix=None): | 
 | 309 |         """Initializer. | 
 | 310 |  | 
 | 311 |         Takes a type constant (a token number < 256), a string value, | 
 | 312 |         and an optional context keyword argument. | 
 | 313 |         """ | 
 | 314 |         assert 0 <= type < 256, type | 
 | 315 |         if context is not None: | 
 | 316 |             self.prefix, (self.lineno, self.column) = context | 
 | 317 |         self.type = type | 
 | 318 |         self.value = value | 
 | 319 |         if prefix is not None: | 
 | 320 |             self.prefix = prefix | 
 | 321 |  | 
 | 322 |     def __repr__(self): | 
 | 323 |         """Returns a canonical string representation.""" | 
 | 324 |         return "%s(%r, %r)" % (self.__class__.__name__, | 
 | 325 |                                self.type, | 
 | 326 |                                self.value) | 
 | 327 |  | 
 | 328 |     def __str__(self): | 
 | 329 |         """Returns a pretty string representation. | 
 | 330 |  | 
 | 331 |         This reproduces the input source exactly. | 
 | 332 |         """ | 
 | 333 |         return self.prefix + str(self.value) | 
 | 334 |  | 
 | 335 |     def _eq(self, other): | 
 | 336 |         """Compares two nodes for equality.""" | 
 | 337 |         return (self.type, self.value) == (other.type, other.value) | 
 | 338 |  | 
 | 339 |     def clone(self): | 
 | 340 |         """Returns a cloned (deep) copy of self.""" | 
 | 341 |         return Leaf(self.type, self.value, | 
 | 342 |                     (self.prefix, (self.lineno, self.column))) | 
 | 343 |  | 
 | 344 |     def post_order(self): | 
 | 345 |         """Returns a post-order iterator for the tree.""" | 
 | 346 |         yield self | 
 | 347 |  | 
 | 348 |     def pre_order(self): | 
 | 349 |         """Returns a pre-order iterator for the tree.""" | 
 | 350 |         yield self | 
 | 351 |  | 
 | 352 |     def set_prefix(self, prefix): | 
 | 353 |         """Sets the prefix for the node.""" | 
 | 354 |         self.changed() | 
 | 355 |         self.prefix = prefix | 
 | 356 |  | 
 | 357 |     def get_prefix(self): | 
 | 358 |         """Returns the prefix for the node.""" | 
 | 359 |         return self.prefix | 
 | 360 |  | 
 | 361 |  | 
 | 362 | def convert(gr, raw_node): | 
 | 363 |     """Converts raw node information to a Node or Leaf instance. | 
 | 364 |  | 
 | 365 |     This is passed to the parser driver which calls it whenever a | 
 | 366 |     reduction of a grammar rule produces a new complete node, so that | 
 | 367 |     the tree is build strictly bottom-up. | 
 | 368 |     """ | 
 | 369 |     type, value, context, children = raw_node | 
 | 370 |     if children or type in gr.number2symbol: | 
 | 371 |         # If there's exactly one child, return that child instead of | 
 | 372 |         # creating a new node. | 
 | 373 |         if len(children) == 1: | 
 | 374 |             return children[0] | 
 | 375 |         return Node(type, children, context=context) | 
 | 376 |     else: | 
 | 377 |         return Leaf(type, value, context=context) | 
 | 378 |  | 
 | 379 |  | 
 | 380 | class BasePattern(object): | 
 | 381 |  | 
 | 382 |     """A pattern is a tree matching pattern. | 
 | 383 |  | 
 | 384 |     It looks for a specific node type (token or symbol), and | 
 | 385 |     optionally for a specific content. | 
 | 386 |  | 
 | 387 |     This is an abstract base class.  There are three concrete | 
 | 388 |     subclasses: | 
 | 389 |  | 
 | 390 |     - LeafPattern matches a single leaf node; | 
 | 391 |     - NodePattern matches a single node (usually non-leaf); | 
 | 392 |     - WildcardPattern matches a sequence of nodes of variable length. | 
 | 393 |     """ | 
 | 394 |  | 
 | 395 |     # Defaults for instance variables | 
 | 396 |     type = None     # Node type (token if < 256, symbol if >= 256) | 
 | 397 |     content = None  # Optional content matching pattern | 
 | 398 |     name = None     # Optional name used to store match in results dict | 
 | 399 |  | 
 | 400 |     def __new__(cls, *args, **kwds): | 
 | 401 |         """Constructor that prevents BasePattern from being instantiated.""" | 
 | 402 |         assert cls is not BasePattern, "Cannot instantiate BasePattern" | 
 | 403 |         return object.__new__(cls) | 
 | 404 |  | 
 | 405 |     def __repr__(self): | 
| Martin v. Löwis | baf267c | 2008-03-22 00:01:12 +0000 | [diff] [blame] | 406 |         args = [type_repr(self.type), self.content, self.name] | 
| Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 407 |         while args and args[-1] is None: | 
 | 408 |             del args[-1] | 
 | 409 |         return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args))) | 
 | 410 |  | 
 | 411 |     def optimize(self): | 
 | 412 |         """A subclass can define this as a hook for optimizations. | 
 | 413 |  | 
 | 414 |         Returns either self or another node with the same effect. | 
 | 415 |         """ | 
 | 416 |         return self | 
 | 417 |  | 
 | 418 |     def match(self, node, results=None): | 
 | 419 |         """Does this pattern exactly match a node? | 
 | 420 |  | 
 | 421 |         Returns True if it matches, False if not. | 
 | 422 |  | 
 | 423 |         If results is not None, it must be a dict which will be | 
 | 424 |         updated with the nodes matching named subpatterns. | 
 | 425 |  | 
 | 426 |         Default implementation for non-wildcard patterns. | 
 | 427 |         """ | 
 | 428 |         if self.type is not None and node.type != self.type: | 
 | 429 |             return False | 
 | 430 |         if self.content is not None: | 
 | 431 |             r = None | 
 | 432 |             if results is not None: | 
 | 433 |                 r = {} | 
 | 434 |             if not self._submatch(node, r): | 
 | 435 |                 return False | 
 | 436 |             if r: | 
 | 437 |                 results.update(r) | 
 | 438 |         if results is not None and self.name: | 
 | 439 |             results[self.name] = node | 
 | 440 |         return True | 
 | 441 |  | 
 | 442 |     def match_seq(self, nodes, results=None): | 
 | 443 |         """Does this pattern exactly match a sequence of nodes? | 
 | 444 |  | 
 | 445 |         Default implementation for non-wildcard patterns. | 
 | 446 |         """ | 
 | 447 |         if len(nodes) != 1: | 
 | 448 |             return False | 
 | 449 |         return self.match(nodes[0], results) | 
 | 450 |  | 
 | 451 |     def generate_matches(self, nodes): | 
 | 452 |         """Generator yielding all matches for this pattern. | 
 | 453 |  | 
 | 454 |         Default implementation for non-wildcard patterns. | 
 | 455 |         """ | 
 | 456 |         r = {} | 
 | 457 |         if nodes and self.match(nodes[0], r): | 
 | 458 |             yield 1, r | 
 | 459 |  | 
 | 460 |  | 
 | 461 | class LeafPattern(BasePattern): | 
 | 462 |  | 
 | 463 |     def __init__(self, type=None, content=None, name=None): | 
 | 464 |         """Initializer.  Takes optional type, content, and name. | 
 | 465 |  | 
 | 466 |         The type, if given must be a token type (< 256).  If not given, | 
 | 467 |         this matches any *leaf* node; the content may still be required. | 
 | 468 |  | 
 | 469 |         The content, if given, must be a string. | 
 | 470 |  | 
 | 471 |         If a name is given, the matching node is stored in the results | 
 | 472 |         dict under that key. | 
 | 473 |         """ | 
 | 474 |         if type is not None: | 
 | 475 |             assert 0 <= type < 256, type | 
 | 476 |         if content is not None: | 
 | 477 |             assert isinstance(content, basestring), repr(content) | 
 | 478 |         self.type = type | 
 | 479 |         self.content = content | 
 | 480 |         self.name = name | 
 | 481 |  | 
 | 482 |     def match(self, node, results=None): | 
 | 483 |         """Override match() to insist on a leaf node.""" | 
 | 484 |         if not isinstance(node, Leaf): | 
 | 485 |             return False | 
 | 486 |         return BasePattern.match(self, node, results) | 
 | 487 |  | 
 | 488 |     def _submatch(self, node, results=None): | 
 | 489 |         """Match the pattern's content to the node's children. | 
 | 490 |  | 
 | 491 |         This assumes the node type matches and self.content is not None. | 
 | 492 |  | 
 | 493 |         Returns True if it matches, False if not. | 
 | 494 |  | 
 | 495 |         If results is not None, it must be a dict which will be | 
 | 496 |         updated with the nodes matching named subpatterns. | 
 | 497 |  | 
 | 498 |         When returning False, the results dict may still be updated. | 
 | 499 |         """ | 
 | 500 |         return self.content == node.value | 
 | 501 |  | 
 | 502 |  | 
 | 503 | class NodePattern(BasePattern): | 
 | 504 |  | 
 | 505 |     wildcards = False | 
 | 506 |  | 
 | 507 |     def __init__(self, type=None, content=None, name=None): | 
 | 508 |         """Initializer.  Takes optional type, content, and name. | 
 | 509 |  | 
 | 510 |         The type, if given, must be a symbol type (>= 256).  If the | 
 | 511 |         type is None this matches *any* single node (leaf or not), | 
 | 512 |         except if content is not None, in which it only matches | 
 | 513 |         non-leaf nodes that also match the content pattern. | 
 | 514 |  | 
 | 515 |         The content, if not None, must be a sequence of Patterns that | 
 | 516 |         must match the node's children exactly.  If the content is | 
 | 517 |         given, the type must not be None. | 
 | 518 |  | 
 | 519 |         If a name is given, the matching node is stored in the results | 
 | 520 |         dict under that key. | 
 | 521 |         """ | 
 | 522 |         if type is not None: | 
 | 523 |             assert type >= 256, type | 
 | 524 |         if content is not None: | 
 | 525 |             assert not isinstance(content, basestring), repr(content) | 
 | 526 |             content = list(content) | 
 | 527 |             for i, item in enumerate(content): | 
 | 528 |                 assert isinstance(item, BasePattern), (i, item) | 
 | 529 |                 if isinstance(item, WildcardPattern): | 
 | 530 |                     self.wildcards = True | 
 | 531 |         self.type = type | 
 | 532 |         self.content = content | 
 | 533 |         self.name = name | 
 | 534 |  | 
 | 535 |     def _submatch(self, node, results=None): | 
 | 536 |         """Match the pattern's content to the node's children. | 
 | 537 |  | 
 | 538 |         This assumes the node type matches and self.content is not None. | 
 | 539 |  | 
 | 540 |         Returns True if it matches, False if not. | 
 | 541 |  | 
 | 542 |         If results is not None, it must be a dict which will be | 
 | 543 |         updated with the nodes matching named subpatterns. | 
 | 544 |  | 
 | 545 |         When returning False, the results dict may still be updated. | 
 | 546 |         """ | 
 | 547 |         if self.wildcards: | 
 | 548 |             for c, r in generate_matches(self.content, node.children): | 
 | 549 |                 if c == len(node.children): | 
 | 550 |                     if results is not None: | 
 | 551 |                         results.update(r) | 
 | 552 |                     return True | 
 | 553 |             return False | 
 | 554 |         if len(self.content) != len(node.children): | 
 | 555 |             return False | 
 | 556 |         for subpattern, child in zip(self.content, node.children): | 
 | 557 |             if not subpattern.match(child, results): | 
 | 558 |                 return False | 
 | 559 |         return True | 
 | 560 |  | 
 | 561 |  | 
 | 562 | class WildcardPattern(BasePattern): | 
 | 563 |  | 
 | 564 |     """A wildcard pattern can match zero or more nodes. | 
 | 565 |  | 
 | 566 |     This has all the flexibility needed to implement patterns like: | 
 | 567 |  | 
 | 568 |     .*      .+      .?      .{m,n} | 
 | 569 |     (a b c | d e | f) | 
 | 570 |     (...)*  (...)+  (...)?  (...){m,n} | 
 | 571 |  | 
 | 572 |     except it always uses non-greedy matching. | 
 | 573 |     """ | 
 | 574 |  | 
 | 575 |     def __init__(self, content=None, min=0, max=HUGE, name=None): | 
 | 576 |         """Initializer. | 
 | 577 |  | 
 | 578 |         Args: | 
 | 579 |             content: optional sequence of subsequences of patterns; | 
 | 580 |                      if absent, matches one node; | 
 | 581 |                      if present, each subsequence is an alternative [*] | 
 | 582 |             min: optinal minumum number of times to match, default 0 | 
 | 583 |             max: optional maximum number of times tro match, default HUGE | 
 | 584 |             name: optional name assigned to this match | 
 | 585 |  | 
 | 586 |         [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is | 
 | 587 |             equivalent to (a b c | d e | f g h); if content is None, | 
 | 588 |             this is equivalent to '.' in regular expression terms. | 
 | 589 |             The min and max parameters work as follows: | 
 | 590 |                 min=0, max=maxint: .* | 
 | 591 |                 min=1, max=maxint: .+ | 
 | 592 |                 min=0, max=1: .? | 
 | 593 |                 min=1, max=1: . | 
 | 594 |             If content is not None, replace the dot with the parenthesized | 
 | 595 |             list of alternatives, e.g. (a b c | d e | f g h)* | 
 | 596 |         """ | 
 | 597 |         assert 0 <= min <= max <= HUGE, (min, max) | 
 | 598 |         if content is not None: | 
 | 599 |             content = tuple(map(tuple, content))  # Protect against alterations | 
 | 600 |             # Check sanity of alternatives | 
 | 601 |             assert len(content), repr(content)  # Can't have zero alternatives | 
 | 602 |             for alt in content: | 
 | 603 |                 assert len(alt), repr(alt) # Can have empty alternatives | 
 | 604 |         self.content = content | 
 | 605 |         self.min = min | 
 | 606 |         self.max = max | 
 | 607 |         self.name = name | 
 | 608 |  | 
 | 609 |     def optimize(self): | 
 | 610 |         """Optimize certain stacked wildcard patterns.""" | 
 | 611 |         subpattern = None | 
 | 612 |         if (self.content is not None and | 
 | 613 |             len(self.content) == 1 and len(self.content[0]) == 1): | 
 | 614 |             subpattern = self.content[0][0] | 
 | 615 |         if self.min == 1 and self.max == 1: | 
 | 616 |             if self.content is None: | 
 | 617 |                 return NodePattern(name=self.name) | 
 | 618 |             if subpattern is not None and  self.name == subpattern.name: | 
 | 619 |                 return subpattern.optimize() | 
 | 620 |         if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and | 
 | 621 |             subpattern.min <= 1 and self.name == subpattern.name): | 
 | 622 |             return WildcardPattern(subpattern.content, | 
 | 623 |                                    self.min*subpattern.min, | 
 | 624 |                                    self.max*subpattern.max, | 
 | 625 |                                    subpattern.name) | 
 | 626 |         return self | 
 | 627 |  | 
 | 628 |     def match(self, node, results=None): | 
 | 629 |         """Does this pattern exactly match a node?""" | 
 | 630 |         return self.match_seq([node], results) | 
 | 631 |  | 
 | 632 |     def match_seq(self, nodes, results=None): | 
 | 633 |         """Does this pattern exactly match a sequence of nodes?""" | 
 | 634 |         for c, r in self.generate_matches(nodes): | 
 | 635 |             if c == len(nodes): | 
 | 636 |                 if results is not None: | 
 | 637 |                     results.update(r) | 
 | 638 |                     if self.name: | 
 | 639 |                         results[self.name] = list(nodes) | 
 | 640 |                 return True | 
 | 641 |         return False | 
 | 642 |  | 
 | 643 |     def generate_matches(self, nodes): | 
 | 644 |         """Generator yielding matches for a sequence of nodes. | 
 | 645 |  | 
 | 646 |         Args: | 
 | 647 |             nodes: sequence of nodes | 
 | 648 |  | 
 | 649 |         Yields: | 
 | 650 |             (count, results) tuples where: | 
 | 651 |             count: the match comprises nodes[:count]; | 
 | 652 |             results: dict containing named submatches. | 
 | 653 |         """ | 
 | 654 |         if self.content is None: | 
 | 655 |             # Shortcut for special case (see __init__.__doc__) | 
 | 656 |             for count in xrange(self.min, 1 + min(len(nodes), self.max)): | 
 | 657 |                 r = {} | 
 | 658 |                 if self.name: | 
 | 659 |                     r[self.name] = nodes[:count] | 
 | 660 |                 yield count, r | 
| Benjamin Peterson | 1ab3149 | 2008-07-17 02:07:46 +0000 | [diff] [blame] | 661 |         elif self.name == "bare_name": | 
 | 662 |             yield self._bare_name_matches(nodes) | 
| Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 663 |         else: | 
| Benjamin Peterson | 2555d9d | 2008-11-28 22:12:14 +0000 | [diff] [blame] | 664 |             # The reason for this is that hitting the recursion limit usually | 
 | 665 |             # results in some ugly messages about how RuntimeErrors are being | 
 | 666 |             # ignored. | 
 | 667 |             save_stderr = sys.stderr | 
 | 668 |             sys.stderr = StringIO() | 
| Benjamin Peterson | 4d16415 | 2008-10-04 20:55:50 +0000 | [diff] [blame] | 669 |             try: | 
 | 670 |                 for count, r in self._recursive_matches(nodes, 0): | 
 | 671 |                     if self.name: | 
 | 672 |                         r[self.name] = nodes[:count] | 
 | 673 |                     yield count, r | 
 | 674 |             except RuntimeError: | 
 | 675 |                 # We fall back to the iterative pattern matching scheme if the recursive | 
 | 676 |                 # scheme hits the recursion limit. | 
 | 677 |                 for count, r in self._iterative_matches(nodes): | 
 | 678 |                     if self.name: | 
 | 679 |                         r[self.name] = nodes[:count] | 
 | 680 |                     yield count, r | 
| Benjamin Peterson | 2555d9d | 2008-11-28 22:12:14 +0000 | [diff] [blame] | 681 |             finally: | 
 | 682 |                 sys.stderr = save_stderr | 
| Benjamin Peterson | 4d16415 | 2008-10-04 20:55:50 +0000 | [diff] [blame] | 683 |  | 
 | 684 |     def _iterative_matches(self, nodes): | 
 | 685 |         """Helper to iteratively yield the matches.""" | 
 | 686 |         nodelen = len(nodes) | 
 | 687 |         if 0 >= self.min: | 
 | 688 |             yield 0, {} | 
 | 689 |  | 
 | 690 |         results = [] | 
 | 691 |         # generate matches that use just one alt from self.content | 
 | 692 |         for alt in self.content: | 
 | 693 |             for c, r in generate_matches(alt, nodes): | 
 | 694 |                 yield c, r | 
 | 695 |                 results.append((c, r)) | 
 | 696 |  | 
 | 697 |         # for each match, iterate down the nodes | 
 | 698 |         while results: | 
 | 699 |             new_results = [] | 
 | 700 |             for c0, r0 in results: | 
 | 701 |                 # stop if the entire set of nodes has been matched | 
 | 702 |                 if c0 < nodelen and c0 <= self.max: | 
 | 703 |                     for alt in self.content: | 
 | 704 |                         for c1, r1 in generate_matches(alt, nodes[c0:]): | 
 | 705 |                             if c1 > 0: | 
 | 706 |                                 r = {} | 
 | 707 |                                 r.update(r0) | 
 | 708 |                                 r.update(r1) | 
 | 709 |                                 yield c0 + c1, r | 
 | 710 |                                 new_results.append((c0 + c1, r)) | 
 | 711 |             results = new_results | 
| Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 712 |  | 
| Benjamin Peterson | 1ab3149 | 2008-07-17 02:07:46 +0000 | [diff] [blame] | 713 |     def _bare_name_matches(self, nodes): | 
 | 714 |         """Special optimized matcher for bare_name.""" | 
 | 715 |         count = 0 | 
 | 716 |         r = {} | 
 | 717 |         done = False | 
 | 718 |         max = len(nodes) | 
 | 719 |         while not done and count < max: | 
 | 720 |             done = True | 
 | 721 |             for leaf in self.content: | 
 | 722 |                 if leaf[0].match(nodes[count], r): | 
 | 723 |                     count += 1 | 
 | 724 |                     done = False | 
 | 725 |                     break | 
 | 726 |         r[self.name] = nodes[:count] | 
 | 727 |         return count, r | 
 | 728 |  | 
| Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 729 |     def _recursive_matches(self, nodes, count): | 
 | 730 |         """Helper to recursively yield the matches.""" | 
 | 731 |         assert self.content is not None | 
 | 732 |         if count >= self.min: | 
| Benjamin Peterson | 1ab3149 | 2008-07-17 02:07:46 +0000 | [diff] [blame] | 733 |             yield 0, {} | 
| Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 734 |         if count < self.max: | 
 | 735 |             for alt in self.content: | 
 | 736 |                 for c0, r0 in generate_matches(alt, nodes): | 
 | 737 |                     for c1, r1 in self._recursive_matches(nodes[c0:], count+1): | 
 | 738 |                         r = {} | 
 | 739 |                         r.update(r0) | 
 | 740 |                         r.update(r1) | 
 | 741 |                         yield c0 + c1, r | 
 | 742 |  | 
 | 743 |  | 
 | 744 | class NegatedPattern(BasePattern): | 
 | 745 |  | 
 | 746 |     def __init__(self, content=None): | 
 | 747 |         """Initializer. | 
 | 748 |  | 
 | 749 |         The argument is either a pattern or None.  If it is None, this | 
 | 750 |         only matches an empty sequence (effectively '$' in regex | 
 | 751 |         lingo).  If it is not None, this matches whenever the argument | 
 | 752 |         pattern doesn't have any matches. | 
 | 753 |         """ | 
 | 754 |         if content is not None: | 
 | 755 |             assert isinstance(content, BasePattern), repr(content) | 
 | 756 |         self.content = content | 
 | 757 |  | 
 | 758 |     def match(self, node): | 
 | 759 |         # We never match a node in its entirety | 
 | 760 |         return False | 
 | 761 |  | 
 | 762 |     def match_seq(self, nodes): | 
 | 763 |         # We only match an empty sequence of nodes in its entirety | 
 | 764 |         return len(nodes) == 0 | 
 | 765 |  | 
 | 766 |     def generate_matches(self, nodes): | 
 | 767 |         if self.content is None: | 
 | 768 |             # Return a match if there is an empty sequence | 
 | 769 |             if len(nodes) == 0: | 
 | 770 |                 yield 0, {} | 
 | 771 |         else: | 
 | 772 |             # Return a match if the argument pattern has no matches | 
 | 773 |             for c, r in self.content.generate_matches(nodes): | 
 | 774 |                 return | 
 | 775 |             yield 0, {} | 
 | 776 |  | 
 | 777 |  | 
 | 778 | def generate_matches(patterns, nodes): | 
 | 779 |     """Generator yielding matches for a sequence of patterns and nodes. | 
 | 780 |  | 
 | 781 |     Args: | 
 | 782 |         patterns: a sequence of patterns | 
 | 783 |         nodes: a sequence of nodes | 
 | 784 |  | 
 | 785 |     Yields: | 
 | 786 |         (count, results) tuples where: | 
 | 787 |         count: the entire sequence of patterns matches nodes[:count]; | 
 | 788 |         results: dict containing named submatches. | 
 | 789 |         """ | 
 | 790 |     if not patterns: | 
 | 791 |         yield 0, {} | 
 | 792 |     else: | 
 | 793 |         p, rest = patterns[0], patterns[1:] | 
 | 794 |         for c0, r0 in p.generate_matches(nodes): | 
 | 795 |             if not rest: | 
 | 796 |                 yield c0, r0 | 
 | 797 |             else: | 
 | 798 |                 for c1, r1 in generate_matches(rest, nodes[c0:]): | 
 | 799 |                     r = {} | 
 | 800 |                     r.update(r0) | 
 | 801 |                     r.update(r1) | 
 | 802 |                     yield c0 + c1, r |