Merged revisions 83852-83853,83857,84042,84216,84274-84276,84375,85388,85478,85506-85508 via svnmerge from
svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3

........
  r83852 | benjamin.peterson | 2010-08-08 15:45:44 -0500 (Sun, 08 Aug 2010) | 1 line

  wrap with parens
........
  r83853 | benjamin.peterson | 2010-08-08 15:46:31 -0500 (Sun, 08 Aug 2010) | 1 line

  use parens
........
  r83857 | benjamin.peterson | 2010-08-08 15:59:49 -0500 (Sun, 08 Aug 2010) | 1 line

  things which use touch_import should be pre order
........
  r84042 | george.boutsioukis | 2010-08-14 16:10:19 -0500 (Sat, 14 Aug 2010) | 2 lines

  This revision incorporates into the 2to3 tool the new, faster, tree matching algorithm developed during a GSOC project. The algorithm resides in the two added modules, btm_matcher and btm_utils. New code has been added to drive the new matching process in refactor.py and a few minor changes were made in other modules. A BM_compatible flag(False by default) has been added in fixer_base and it is set to True in most of the current fixers.
........
  r84216 | benjamin.peterson | 2010-08-19 16:44:05 -0500 (Thu, 19 Aug 2010) | 1 line

  allow star_expr in testlist_gexp
........
  r84274 | benjamin.peterson | 2010-08-22 18:40:46 -0500 (Sun, 22 Aug 2010) | 1 line

  wrap long line
........
  r84275 | benjamin.peterson | 2010-08-22 18:42:22 -0500 (Sun, 22 Aug 2010) | 1 line

  cleanup
........
  r84276 | benjamin.peterson | 2010-08-22 18:51:01 -0500 (Sun, 22 Aug 2010) | 1 line

  when there's a None value and a traceback, don't call type with it #9661
........
  r84375 | george.boutsioukis | 2010-08-31 08:38:53 -0500 (Tue, 31 Aug 2010) | 3 lines

  Idiomatic code changes & stylistic issues fixed in the BottomMatcher module. Thanks to Benjamin Peterson for taking the time to review the code.
........
  r85388 | benjamin.peterson | 2010-10-12 17:27:44 -0500 (Tue, 12 Oct 2010) | 1 line

  fix urllib fixer with multiple as imports on a line #10069
........
  r85478 | benjamin.peterson | 2010-10-14 08:09:56 -0500 (Thu, 14 Oct 2010) | 1 line

  stop abusing docstrings
........
  r85506 | benjamin.peterson | 2010-10-14 17:45:19 -0500 (Thu, 14 Oct 2010) | 1 line

  kill sibling import
........
  r85507 | benjamin.peterson | 2010-10-14 17:54:15 -0500 (Thu, 14 Oct 2010) | 1 line

  remove trailing whitespace
........
  r85508 | benjamin.peterson | 2010-10-14 17:55:28 -0500 (Thu, 14 Oct 2010) | 1 line

  typo
........
diff --git a/Lib/lib2to3/btm_utils.py b/Lib/lib2to3/btm_utils.py
new file mode 100644
index 0000000..2276dc9
--- /dev/null
+++ b/Lib/lib2to3/btm_utils.py
@@ -0,0 +1,283 @@
+"Utility functions used by the btm_matcher module"
+
+from . import pytree
+from .pgen2 import grammar, token
+from .pygram import pattern_symbols, python_symbols
+
+syms = pattern_symbols
+pysyms = python_symbols
+tokens = grammar.opmap
+token_labels = token
+
+TYPE_ANY = -1
+TYPE_ALTERNATIVES = -2
+TYPE_GROUP = -3
+
+class MinNode(object):
+    """This class serves as an intermediate representation of the
+    pattern tree during the conversion to sets of leaf-to-root
+    subpatterns"""
+
+    def __init__(self, type=None, name=None):
+        self.type = type
+        self.name = name
+        self.children = []
+        self.leaf = False
+        self.parent = None
+        self.alternatives = []
+        self.group = []
+
+    def __repr__(self):
+        return str(self.type) + ' ' + str(self.name)
+
+    def leaf_to_root(self):
+        """Internal method. Returns a characteristic path of the
+        pattern tree. This method must be run for all leaves until the
+        linear subpatterns are merged into a single"""
+        node = self
+        subp = []
+        while node:
+            if node.type == TYPE_ALTERNATIVES:
+                node.alternatives.append(subp)
+                if len(node.alternatives) == len(node.children):
+                    #last alternative
+                    subp = [tuple(node.alternatives)]
+                    node.alternatives = []
+                    node = node.parent
+                    continue
+                else:
+                    node = node.parent
+                    subp = None
+                    break
+
+            if node.type == TYPE_GROUP:
+                node.group.append(subp)
+                #probably should check the number of leaves
+                if len(node.group) == len(node.children):
+                    subp = get_characteristic_subpattern(node.group)
+                    node.group = []
+                    node = node.parent
+                    continue
+                else:
+                    node = node.parent
+                    subp = None
+                    break
+
+            if node.type == token_labels.NAME and node.name:
+                #in case of type=name, use the name instead
+                subp.append(node.name)
+            else:
+                subp.append(node.type)
+
+            node = node.parent
+        return subp
+
+    def get_linear_subpattern(self):
+        """Drives the leaf_to_root method. The reason that
+        leaf_to_root must be run multiple times is because we need to
+        reject 'group' matches; for example the alternative form
+        (a | b c) creates a group [b c] that needs to be matched. Since
+        matching multiple linear patterns overcomes the automaton's
+        capabilities, leaf_to_root merges each group into a single
+        choice based on 'characteristic'ity,
+
+        i.e. (a|b c) -> (a|b) if b more characteristic than c
+
+        Returns: The most 'characteristic'(as defined by
+          get_characteristic_subpattern) path for the compiled pattern
+          tree.
+        """
+
+        for l in self.leaves():
+            subp = l.leaf_to_root()
+            if subp:
+                return subp
+
+    def leaves(self):
+        "Generator that returns the leaves of the tree"
+        for child in self.children:
+            for x in child.leaves():
+                yield x
+        if not self.children:
+            yield self
+
+def reduce_tree(node, parent=None):
+    """
+    Internal function. Reduces a compiled pattern tree to an
+    intermediate representation suitable for feeding the
+    automaton. This also trims off any optional pattern elements(like
+    [a], a*).
+    """
+
+    new_node = None
+    #switch on the node type
+    if node.type == syms.Matcher:
+        #skip
+        node = node.children[0]
+
+    if node.type == syms.Alternatives  :
+        #2 cases
+        if len(node.children) <= 2:
+            #just a single 'Alternative', skip this node
+            new_node = reduce_tree(node.children[0], parent)
+        else:
+            #real alternatives
+            new_node = MinNode(type=TYPE_ALTERNATIVES)
+            #skip odd children('|' tokens)
+            for child in node.children:
+                if node.children.index(child)%2:
+                    continue
+                reduced = reduce_tree(child, new_node)
+                if reduced is not None:
+                    new_node.children.append(reduced)
+    elif node.type == syms.Alternative:
+        if len(node.children) > 1:
+
+            new_node = MinNode(type=TYPE_GROUP)
+            for child in node.children:
+                reduced = reduce_tree(child, new_node)
+                if reduced:
+                    new_node.children.append(reduced)
+            if not new_node.children:
+                # delete the group if all of the children were reduced to None
+                new_node = None
+
+        else:
+            new_node = reduce_tree(node.children[0], parent)
+
+    elif node.type == syms.Unit:
+        if (isinstance(node.children[0], pytree.Leaf) and
+            node.children[0].value == '('):
+            #skip parentheses
+            return reduce_tree(node.children[1], parent)
+        if ((isinstance(node.children[0], pytree.Leaf) and
+               node.children[0].value == '[')
+               or
+               (len(node.children)>1 and
+               hasattr(node.children[1], "value") and
+               node.children[1].value == '[')):
+            #skip whole unit if its optional
+            return None
+
+        leaf = True
+        details_node = None
+        alternatives_node = None
+        has_repeater = False
+        repeater_node = None
+        has_variable_name = False
+
+        for child in node.children:
+            if child.type == syms.Details:
+                leaf = False
+                details_node = child
+            elif child.type == syms.Repeater:
+                has_repeater = True
+                repeater_node = child
+            elif child.type == syms.Alternatives:
+                alternatives_node = child
+            if hasattr(child, 'value') and child.value == '=': # variable name
+                has_variable_name = True
+
+        #skip variable name
+        if has_variable_name:
+            #skip variable name, '='
+            name_leaf = node.children[2]
+            if hasattr(name_leaf, 'value') and name_leaf.value == '(':
+                # skip parenthesis
+                name_leaf = node.children[3]
+        else:
+            name_leaf = node.children[0]
+
+        #set node type
+        if name_leaf.type == token_labels.NAME:
+            #(python) non-name or wildcard
+            if name_leaf.value == 'any':
+                new_node = MinNode(type=TYPE_ANY)
+            else:
+                if hasattr(token_labels, name_leaf.value):
+                    new_node = MinNode(type=getattr(token_labels, name_leaf.value))
+                else:
+                    new_node = MinNode(type=getattr(pysyms, name_leaf.value))
+
+        elif name_leaf.type == token_labels.STRING:
+            #(python) name or character; remove the apostrophes from
+            #the string value
+            name = name_leaf.value.strip("'")
+            if name in tokens:
+                new_node = MinNode(type=tokens[name])
+            else:
+                new_node = MinNode(type=token_labels.NAME, name=name)
+        elif name_leaf.type == syms.Alternatives:
+            new_node = reduce_tree(alternatives_node, parent)
+
+        #handle repeaters
+        if has_repeater:
+            if repeater_node.children[0].value == '*':
+                #reduce to None
+                new_node = None
+            elif repeater_node.children[0].value == '+':
+                #reduce to a single occurence i.e. do nothing
+                pass
+            else:
+                #TODO: handle {min, max} repeaters
+                raise NotImplementedError
+                pass
+
+        #add children
+        if details_node and new_node is not None:
+            for child in details_node.children[1:-1]:
+                #skip '<', '>' markers
+                reduced = reduce_tree(child, new_node)
+                if reduced is not None:
+                    new_node.children.append(reduced)
+    if new_node:
+        new_node.parent = parent
+    return new_node
+
+
+def get_characteristic_subpattern(subpatterns):
+    """Picks the most characteristic from a list of linear patterns
+    Current order used is:
+    names > common_names > common_chars
+    """
+    if not isinstance(subpatterns, list):
+        return subpatterns
+    if len(subpatterns)==1:
+        return subpatterns[0]
+
+    # first pick out the ones containing variable names
+    subpatterns_with_names = []
+    subpatterns_with_common_names = []
+    common_names = ['in', 'for', 'if' , 'not', 'None']
+    subpatterns_with_common_chars = []
+    common_chars = "[]().,:"
+    for subpattern in subpatterns:
+        if any(rec_test(subpattern, lambda x: type(x) is str)):
+            if any(rec_test(subpattern,
+                            lambda x: isinstance(x, str) and x in common_chars)):
+                subpatterns_with_common_chars.append(subpattern)
+            elif any(rec_test(subpattern,
+                              lambda x: isinstance(x, str) and x in common_names)):
+                subpatterns_with_common_names.append(subpattern)
+
+            else:
+                subpatterns_with_names.append(subpattern)
+
+    if subpatterns_with_names:
+        subpatterns = subpatterns_with_names
+    elif subpatterns_with_common_names:
+        subpatterns = subpatterns_with_common_names
+    elif subpatterns_with_common_chars:
+        subpatterns = subpatterns_with_common_chars
+    # of the remaining subpatterns pick out the longest one
+    return max(subpatterns, key=len)
+
+def rec_test(sequence, test_func):
+    """Tests test_func on all items of sequence and items of included
+    sub-iterables"""
+    for x in sequence:
+        if isinstance(x, (list, tuple)):
+            for y in rec_test(x, test_func):
+                yield y
+        else:
+            yield test_func(x)