Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 1 | """Utility functions, node construction macros, etc.""" |
| 2 | # Author: Collin Winter |
| 3 | |
| 4 | # Local imports |
Benjamin Peterson | e607823 | 2008-06-15 02:31:05 +0000 | [diff] [blame] | 5 | from .pgen2 import token |
| 6 | from .pytree import Leaf, Node |
| 7 | from .pygram import python_symbols as syms |
| 8 | from . import patcomp |
Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 9 | |
| 10 | |
| 11 | ########################################################### |
| 12 | ### Common node-construction "macros" |
| 13 | ########################################################### |
| 14 | |
| 15 | def KeywordArg(keyword, value): |
| 16 | return Node(syms.argument, |
| 17 | [keyword, Leaf(token.EQUAL, '='), value]) |
| 18 | |
| 19 | def LParen(): |
| 20 | return Leaf(token.LPAR, "(") |
| 21 | |
| 22 | def RParen(): |
| 23 | return Leaf(token.RPAR, ")") |
| 24 | |
| 25 | def Assign(target, source): |
| 26 | """Build an assignment statement""" |
| 27 | if not isinstance(target, list): |
| 28 | target = [target] |
| 29 | if not isinstance(source, list): |
| 30 | source.set_prefix(" ") |
| 31 | source = [source] |
| 32 | |
| 33 | return Node(syms.atom, |
| 34 | target + [Leaf(token.EQUAL, "=", prefix=" ")] + source) |
| 35 | |
| 36 | def Name(name, prefix=None): |
| 37 | """Return a NAME leaf""" |
| 38 | return Leaf(token.NAME, name, prefix=prefix) |
| 39 | |
| 40 | def Attr(obj, attr): |
| 41 | """A node tuple for obj.attr""" |
| 42 | return [obj, Node(syms.trailer, [Dot(), attr])] |
| 43 | |
| 44 | def Comma(): |
| 45 | """A comma leaf""" |
| 46 | return Leaf(token.COMMA, ",") |
| 47 | |
| 48 | def Dot(): |
| 49 | """A period (.) leaf""" |
| 50 | return Leaf(token.DOT, ".") |
| 51 | |
| 52 | def ArgList(args, lparen=LParen(), rparen=RParen()): |
| 53 | """A parenthesised argument list, used by Call()""" |
Benjamin Peterson | e5c1d29 | 2008-09-01 17:17:22 +0000 | [diff] [blame] | 54 | node = Node(syms.trailer, [lparen.clone(), rparen.clone()]) |
| 55 | if args: |
| 56 | node.insert_child(1, Node(syms.arglist, args)) |
| 57 | return node |
Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 58 | |
Benjamin Peterson | e5c1d29 | 2008-09-01 17:17:22 +0000 | [diff] [blame] | 59 | def Call(func_name, args=None, prefix=None): |
Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 60 | """A function call""" |
| 61 | node = Node(syms.power, [func_name, ArgList(args)]) |
| 62 | if prefix is not None: |
| 63 | node.set_prefix(prefix) |
| 64 | return node |
| 65 | |
| 66 | def Newline(): |
| 67 | """A newline literal""" |
| 68 | return Leaf(token.NEWLINE, "\n") |
| 69 | |
| 70 | def BlankLine(): |
| 71 | """A blank line""" |
| 72 | return Leaf(token.NEWLINE, "") |
| 73 | |
| 74 | def Number(n, prefix=None): |
| 75 | return Leaf(token.NUMBER, n, prefix=prefix) |
| 76 | |
| 77 | def Subscript(index_node): |
| 78 | """A numeric or string subscript""" |
| 79 | return Node(syms.trailer, [Leaf(token.LBRACE, '['), |
| 80 | index_node, |
| 81 | Leaf(token.RBRACE, ']')]) |
| 82 | |
| 83 | def String(string, prefix=None): |
| 84 | """A string leaf""" |
| 85 | return Leaf(token.STRING, string, prefix=prefix) |
| 86 | |
| 87 | def ListComp(xp, fp, it, test=None): |
| 88 | """A list comprehension of the form [xp for fp in it if test]. |
| 89 | |
| 90 | If test is None, the "if test" part is omitted. |
| 91 | """ |
| 92 | xp.set_prefix("") |
| 93 | fp.set_prefix(" ") |
| 94 | it.set_prefix(" ") |
| 95 | for_leaf = Leaf(token.NAME, "for") |
| 96 | for_leaf.set_prefix(" ") |
| 97 | in_leaf = Leaf(token.NAME, "in") |
| 98 | in_leaf.set_prefix(" ") |
| 99 | inner_args = [for_leaf, fp, in_leaf, it] |
| 100 | if test: |
| 101 | test.set_prefix(" ") |
| 102 | if_leaf = Leaf(token.NAME, "if") |
| 103 | if_leaf.set_prefix(" ") |
| 104 | inner_args.append(Node(syms.comp_if, [if_leaf, test])) |
| 105 | inner = Node(syms.listmaker, [xp, Node(syms.comp_for, inner_args)]) |
| 106 | return Node(syms.atom, |
| 107 | [Leaf(token.LBRACE, "["), |
| 108 | inner, |
| 109 | Leaf(token.RBRACE, "]")]) |
| 110 | |
Martin v. Löwis | 966d0e0 | 2008-03-24 00:46:53 +0000 | [diff] [blame] | 111 | def FromImport(package_name, name_leafs): |
| 112 | """ Return an import statement in the form: |
| 113 | from package import name_leafs""" |
| 114 | # XXX: May not handle dotted imports properly (eg, package_name='foo.bar') |
Benjamin Peterson | 699b090 | 2008-07-16 17:01:46 +0000 | [diff] [blame] | 115 | #assert package_name == '.' or '.' not in package_name, "FromImport has "\ |
| 116 | # "not been tested with dotted package names -- use at your own "\ |
| 117 | # "peril!" |
Martin v. Löwis | 966d0e0 | 2008-03-24 00:46:53 +0000 | [diff] [blame] | 118 | |
| 119 | for leaf in name_leafs: |
| 120 | # Pull the leaves out of their old tree |
| 121 | leaf.remove() |
| 122 | |
| 123 | children = [Leaf(token.NAME, 'from'), |
| 124 | Leaf(token.NAME, package_name, prefix=" "), |
| 125 | Leaf(token.NAME, 'import', prefix=" "), |
| 126 | Node(syms.import_as_names, name_leafs)] |
| 127 | imp = Node(syms.import_from, children) |
| 128 | return imp |
| 129 | |
| 130 | |
Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 131 | ########################################################### |
| 132 | ### Determine whether a node represents a given literal |
| 133 | ########################################################### |
| 134 | |
| 135 | def is_tuple(node): |
| 136 | """Does the node represent a tuple literal?""" |
| 137 | if isinstance(node, Node) and node.children == [LParen(), RParen()]: |
| 138 | return True |
| 139 | return (isinstance(node, Node) |
| 140 | and len(node.children) == 3 |
| 141 | and isinstance(node.children[0], Leaf) |
| 142 | and isinstance(node.children[1], Node) |
| 143 | and isinstance(node.children[2], Leaf) |
| 144 | and node.children[0].value == "(" |
| 145 | and node.children[2].value == ")") |
| 146 | |
| 147 | def is_list(node): |
| 148 | """Does the node represent a list literal?""" |
| 149 | return (isinstance(node, Node) |
| 150 | and len(node.children) > 1 |
| 151 | and isinstance(node.children[0], Leaf) |
| 152 | and isinstance(node.children[-1], Leaf) |
| 153 | and node.children[0].value == "[" |
| 154 | and node.children[-1].value == "]") |
| 155 | |
Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 156 | |
| 157 | ########################################################### |
| 158 | ### Misc |
| 159 | ########################################################### |
| 160 | |
Benjamin Peterson | 43caaa0 | 2008-12-16 03:35:28 +0000 | [diff] [blame] | 161 | def parenthesize(node): |
| 162 | return Node(syms.atom, [LParen(), node, RParen()]) |
| 163 | |
Martin v. Löwis | 60a819d | 2008-04-10 02:48:01 +0000 | [diff] [blame] | 164 | |
| 165 | consuming_calls = set(["sorted", "list", "set", "any", "all", "tuple", "sum", |
| 166 | "min", "max"]) |
| 167 | |
Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 168 | def attr_chain(obj, attr): |
| 169 | """Follow an attribute chain. |
Martin v. Löwis | ab41b37 | 2008-03-19 05:22:42 +0000 | [diff] [blame] | 170 | |
Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 171 | If you have a chain of objects where a.foo -> b, b.foo-> c, etc, |
| 172 | use this to iterate over all objects in the chain. Iteration is |
| 173 | terminated by getattr(x, attr) is None. |
Martin v. Löwis | ab41b37 | 2008-03-19 05:22:42 +0000 | [diff] [blame] | 174 | |
Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 175 | Args: |
| 176 | obj: the starting object |
| 177 | attr: the name of the chaining attribute |
Martin v. Löwis | ab41b37 | 2008-03-19 05:22:42 +0000 | [diff] [blame] | 178 | |
Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 179 | Yields: |
| 180 | Each successive object in the chain. |
| 181 | """ |
| 182 | next = getattr(obj, attr) |
| 183 | while next: |
| 184 | yield next |
| 185 | next = getattr(next, attr) |
| 186 | |
Martin v. Löwis | ab41b37 | 2008-03-19 05:22:42 +0000 | [diff] [blame] | 187 | p0 = """for_stmt< 'for' any 'in' node=any ':' any* > |
| 188 | | comp_for< 'for' any 'in' node=any any* > |
| 189 | """ |
| 190 | p1 = """ |
| 191 | power< |
| 192 | ( 'iter' | 'list' | 'tuple' | 'sorted' | 'set' | 'sum' | |
| 193 | 'any' | 'all' | (any* trailer< '.' 'join' >) ) |
| 194 | trailer< '(' node=any ')' > |
| 195 | any* |
| 196 | > |
| 197 | """ |
| 198 | p2 = """ |
| 199 | power< |
| 200 | 'sorted' |
| 201 | trailer< '(' arglist<node=any any*> ')' > |
| 202 | any* |
| 203 | > |
| 204 | """ |
| 205 | pats_built = False |
| 206 | def in_special_context(node): |
| 207 | """ Returns true if node is in an environment where all that is required |
| 208 | of it is being itterable (ie, it doesn't matter if it returns a list |
| 209 | or an itterator). |
| 210 | See test_map_nochange in test_fixers.py for some examples and tests. |
| 211 | """ |
| 212 | global p0, p1, p2, pats_built |
| 213 | if not pats_built: |
| 214 | p1 = patcomp.compile_pattern(p1) |
| 215 | p0 = patcomp.compile_pattern(p0) |
| 216 | p2 = patcomp.compile_pattern(p2) |
| 217 | pats_built = True |
| 218 | patterns = [p0, p1, p2] |
| 219 | for pattern, parent in zip(patterns, attr_chain(node, "parent")): |
| 220 | results = {} |
| 221 | if pattern.match(parent, results) and results["node"] is node: |
| 222 | return True |
| 223 | return False |
| 224 | |
Benjamin Peterson | 37fc823 | 2009-01-03 16:34:02 +0000 | [diff] [blame] | 225 | def is_probably_builtin(node): |
| 226 | """ |
| 227 | Check that something isn't an attribute or function name etc. |
| 228 | """ |
Benjamin Peterson | eaeb4c6 | 2009-05-05 23:13:58 +0000 | [diff] [blame^] | 229 | prev = node.prev_sibling |
Benjamin Peterson | 37fc823 | 2009-01-03 16:34:02 +0000 | [diff] [blame] | 230 | if prev is not None and prev.type == token.DOT: |
| 231 | # Attribute lookup. |
| 232 | return False |
| 233 | parent = node.parent |
| 234 | if parent.type in (syms.funcdef, syms.classdef): |
| 235 | return False |
| 236 | if parent.type == syms.expr_stmt and parent.children[0] is node: |
| 237 | # Assignment. |
| 238 | return False |
| 239 | if parent.type == syms.parameters or \ |
| 240 | (parent.type == syms.typedargslist and ( |
| 241 | (prev is not None and prev.type == token.COMMA) or |
| 242 | parent.children[0] is node |
| 243 | )): |
| 244 | # The name of an argument. |
| 245 | return False |
| 246 | return True |
| 247 | |
Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 248 | ########################################################### |
| 249 | ### The following functions are to find bindings in a suite |
| 250 | ########################################################### |
| 251 | |
| 252 | def make_suite(node): |
| 253 | if node.type == syms.suite: |
| 254 | return node |
| 255 | node = node.clone() |
| 256 | parent, node.parent = node.parent, None |
| 257 | suite = Node(syms.suite, [node]) |
| 258 | suite.parent = parent |
| 259 | return suite |
| 260 | |
Benjamin Peterson | 43caaa0 | 2008-12-16 03:35:28 +0000 | [diff] [blame] | 261 | def find_root(node): |
| 262 | """Find the top level namespace.""" |
Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 263 | # Scamper up to the top level namespace |
| 264 | while node.type != syms.file_input: |
| 265 | assert node.parent, "Tree is insane! root found before "\ |
| 266 | "file_input node was found." |
| 267 | node = node.parent |
Benjamin Peterson | 43caaa0 | 2008-12-16 03:35:28 +0000 | [diff] [blame] | 268 | return node |
Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 269 | |
Benjamin Peterson | 43caaa0 | 2008-12-16 03:35:28 +0000 | [diff] [blame] | 270 | def does_tree_import(package, name, node): |
| 271 | """ Returns true if name is imported from package at the |
| 272 | top level of the tree which node belongs to. |
| 273 | To cover the case of an import like 'import foo', use |
| 274 | None for the package and 'foo' for the name. """ |
| 275 | binding = find_binding(name, find_root(node), package) |
Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 276 | return bool(binding) |
| 277 | |
Benjamin Peterson | 43caaa0 | 2008-12-16 03:35:28 +0000 | [diff] [blame] | 278 | def is_import(node): |
| 279 | """Returns true if the node is an import statement.""" |
| 280 | return node.type in (syms.import_name, syms.import_from) |
| 281 | |
| 282 | def touch_import(package, name, node): |
| 283 | """ Works like `does_tree_import` but adds an import statement |
| 284 | if it was not imported. """ |
| 285 | def is_import_stmt(node): |
| 286 | return node.type == syms.simple_stmt and node.children and \ |
| 287 | is_import(node.children[0]) |
| 288 | |
| 289 | root = find_root(node) |
| 290 | |
| 291 | if does_tree_import(package, name, root): |
| 292 | return |
| 293 | |
| 294 | add_newline_before = False |
| 295 | |
| 296 | # figure out where to insert the new import. First try to find |
| 297 | # the first import and then skip to the last one. |
| 298 | insert_pos = offset = 0 |
| 299 | for idx, node in enumerate(root.children): |
| 300 | if not is_import_stmt(node): |
| 301 | continue |
| 302 | for offset, node2 in enumerate(root.children[idx:]): |
| 303 | if not is_import_stmt(node2): |
| 304 | break |
| 305 | insert_pos = idx + offset |
| 306 | break |
| 307 | |
| 308 | # if there are no imports where we can insert, find the docstring. |
| 309 | # if that also fails, we stick to the beginning of the file |
| 310 | if insert_pos == 0: |
| 311 | for idx, node in enumerate(root.children): |
| 312 | if node.type == syms.simple_stmt and node.children and \ |
| 313 | node.children[0].type == token.STRING: |
| 314 | insert_pos = idx + 1 |
| 315 | add_newline_before |
| 316 | break |
| 317 | |
| 318 | if package is None: |
| 319 | import_ = Node(syms.import_name, [ |
| 320 | Leaf(token.NAME, 'import'), |
| 321 | Leaf(token.NAME, name, prefix=' ') |
| 322 | ]) |
| 323 | else: |
| 324 | import_ = FromImport(package, [Leaf(token.NAME, name, prefix=' ')]) |
| 325 | |
| 326 | children = [import_, Newline()] |
| 327 | if add_newline_before: |
| 328 | children.insert(0, Newline()) |
| 329 | root.insert_child(insert_pos, Node(syms.simple_stmt, children)) |
| 330 | |
| 331 | |
Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 332 | _def_syms = set([syms.classdef, syms.funcdef]) |
| 333 | def find_binding(name, node, package=None): |
| 334 | """ Returns the node which binds variable name, otherwise None. |
| 335 | If optional argument package is supplied, only imports will |
| 336 | be returned. |
| 337 | See test cases for examples.""" |
| 338 | for child in node.children: |
| 339 | ret = None |
| 340 | if child.type == syms.for_stmt: |
| 341 | if _find(name, child.children[1]): |
| 342 | return child |
| 343 | n = find_binding(name, make_suite(child.children[-1]), package) |
| 344 | if n: ret = n |
| 345 | elif child.type in (syms.if_stmt, syms.while_stmt): |
| 346 | n = find_binding(name, make_suite(child.children[-1]), package) |
| 347 | if n: ret = n |
| 348 | elif child.type == syms.try_stmt: |
| 349 | n = find_binding(name, make_suite(child.children[2]), package) |
| 350 | if n: |
| 351 | ret = n |
| 352 | else: |
| 353 | for i, kid in enumerate(child.children[3:]): |
| 354 | if kid.type == token.COLON and kid.value == ":": |
| 355 | # i+3 is the colon, i+4 is the suite |
| 356 | n = find_binding(name, make_suite(child.children[i+4]), package) |
| 357 | if n: ret = n |
| 358 | elif child.type in _def_syms and child.children[1].value == name: |
| 359 | ret = child |
| 360 | elif _is_import_binding(child, name, package): |
| 361 | ret = child |
| 362 | elif child.type == syms.simple_stmt: |
| 363 | ret = find_binding(name, child, package) |
| 364 | elif child.type == syms.expr_stmt: |
Martin v. Löwis | ab41b37 | 2008-03-19 05:22:42 +0000 | [diff] [blame] | 365 | if _find(name, child.children[0]): |
| 366 | ret = child |
Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 367 | |
| 368 | if ret: |
| 369 | if not package: |
| 370 | return ret |
Benjamin Peterson | 43caaa0 | 2008-12-16 03:35:28 +0000 | [diff] [blame] | 371 | if is_import(ret): |
Martin v. Löwis | 5e37bae | 2008-03-19 04:43:46 +0000 | [diff] [blame] | 372 | return ret |
| 373 | return None |
| 374 | |
| 375 | _block_syms = set([syms.funcdef, syms.classdef, syms.trailer]) |
| 376 | def _find(name, node): |
| 377 | nodes = [node] |
| 378 | while nodes: |
| 379 | node = nodes.pop() |
| 380 | if node.type > 256 and node.type not in _block_syms: |
| 381 | nodes.extend(node.children) |
| 382 | elif node.type == token.NAME and node.value == name: |
| 383 | return node |
| 384 | return None |
| 385 | |
| 386 | def _is_import_binding(node, name, package=None): |
| 387 | """ Will reuturn node if node will import name, or node |
| 388 | will import * from package. None is returned otherwise. |
| 389 | See test cases for examples. """ |
| 390 | |
| 391 | if node.type == syms.import_name and not package: |
| 392 | imp = node.children[1] |
| 393 | if imp.type == syms.dotted_as_names: |
| 394 | for child in imp.children: |
| 395 | if child.type == syms.dotted_as_name: |
| 396 | if child.children[2].value == name: |
| 397 | return node |
| 398 | elif child.type == token.NAME and child.value == name: |
| 399 | return node |
| 400 | elif imp.type == syms.dotted_as_name: |
| 401 | last = imp.children[-1] |
| 402 | if last.type == token.NAME and last.value == name: |
| 403 | return node |
| 404 | elif imp.type == token.NAME and imp.value == name: |
| 405 | return node |
| 406 | elif node.type == syms.import_from: |
| 407 | # unicode(...) is used to make life easier here, because |
| 408 | # from a.b import parses to ['import', ['a', '.', 'b'], ...] |
| 409 | if package and unicode(node.children[1]).strip() != package: |
| 410 | return None |
| 411 | n = node.children[3] |
| 412 | if package and _find('as', n): |
| 413 | # See test_from_import_as for explanation |
| 414 | return None |
| 415 | elif n.type == syms.import_as_names and _find(name, n): |
| 416 | return node |
| 417 | elif n.type == syms.import_as_name: |
| 418 | child = n.children[2] |
| 419 | if child.type == token.NAME and child.value == name: |
| 420 | return node |
| 421 | elif n.type == token.NAME and n.value == name: |
| 422 | return node |
| 423 | elif package and n.type == token.STAR: |
| 424 | return node |
| 425 | return None |