Merged revisions 61596-61597 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk

........
  r61596 | martin.v.loewis | 2008-03-18 23:43:46 -0500 (Di, 18 Mär 2008) | 2 lines

  Import lib2to3.
........
  r61597 | martin.v.loewis | 2008-03-18 23:58:04 -0500 (Di, 18 Mär 2008) | 3 lines

  Initialized merge tracking via "svnmerge" with revisions "1-61595" from
  svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3
........
diff --git a/Lib/lib2to3/patcomp.py b/Lib/lib2to3/patcomp.py
new file mode 100644
index 0000000..353b960
--- /dev/null
+++ b/Lib/lib2to3/patcomp.py
@@ -0,0 +1,186 @@
+# Copyright 2006 Google, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Pattern compiler.
+
+The grammer is taken from PatternGrammar.txt.
+
+The compiler compiles a pattern to a pytree.*Pattern instance.
+"""
+
+__author__ = "Guido van Rossum <guido@python.org>"
+
+# Python imports
+import os
+
+# Fairly local imports
+from .pgen2 import driver
+from .pgen2 import literals
+from .pgen2 import token
+from .pgen2 import tokenize
+
+# Really local imports
+from . import pytree
+from . import pygram
+
+# The pattern grammar file
+_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
+                                     "PatternGrammar.txt")
+
+
+def tokenize_wrapper(input):
+    """Tokenizes a string suppressing significant whitespace."""
+    skip = (token.NEWLINE, token.INDENT, token.DEDENT)
+    tokens = tokenize.generate_tokens(driver.generate_lines(input).next)
+    for quintuple in tokens:
+        type, value, start, end, line_text = quintuple
+        if type not in skip:
+            yield quintuple
+
+
+class PatternCompiler(object):
+
+    def __init__(self, grammar_file=_PATTERN_GRAMMAR_FILE):
+        """Initializer.
+
+        Takes an optional alternative filename for the pattern grammar.
+        """
+        self.grammar = driver.load_grammar(grammar_file)
+        self.syms = pygram.Symbols(self.grammar)
+        self.pygrammar = pygram.python_grammar
+        self.pysyms = pygram.python_symbols
+        self.driver = driver.Driver(self.grammar, convert=pattern_convert)
+
+    def compile_pattern(self, input, debug=False):
+        """Compiles a pattern string to a nested pytree.*Pattern object."""
+        tokens = tokenize_wrapper(input)
+        root = self.driver.parse_tokens(tokens, debug=debug)
+        return self.compile_node(root)
+
+    def compile_node(self, node):
+        """Compiles a node, recursively.
+
+        This is one big switch on the node type.
+        """
+        # XXX Optimize certain Wildcard-containing-Wildcard patterns
+        # that can be merged
+        if node.type == self.syms.Matcher:
+            node = node.children[0] # Avoid unneeded recursion
+
+        if node.type == self.syms.Alternatives:
+            # Skip the odd children since they are just '|' tokens
+            alts = [self.compile_node(ch) for ch in node.children[::2]]
+            if len(alts) == 1:
+                return alts[0]
+            p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
+            return p.optimize()
+
+        if node.type == self.syms.Alternative:
+            units = [self.compile_node(ch) for ch in node.children]
+            if len(units) == 1:
+                return units[0]
+            p = pytree.WildcardPattern([units], min=1, max=1)
+            return p.optimize()
+
+        if node.type == self.syms.NegatedUnit:
+            pattern = self.compile_basic(node.children[1:])
+            p = pytree.NegatedPattern(pattern)
+            return p.optimize()
+
+        assert node.type == self.syms.Unit
+
+        name = None
+        nodes = node.children
+        if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
+            name = nodes[0].value
+            nodes = nodes[2:]
+        repeat = None
+        if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
+            repeat = nodes[-1]
+            nodes = nodes[:-1]
+
+        # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
+        pattern = self.compile_basic(nodes, repeat)
+
+        if repeat is not None:
+            assert repeat.type == self.syms.Repeater
+            children = repeat.children
+            child = children[0]
+            if child.type == token.STAR:
+                min = 0
+                max = pytree.HUGE
+            elif child.type == token.PLUS:
+                min = 1
+                max = pytree.HUGE
+            elif child.type == token.LBRACE:
+                assert children[-1].type == token.RBRACE
+                assert  len(children) in (3, 5)
+                min = max = self.get_int(children[1])
+                if len(children) == 5:
+                    max = self.get_int(children[3])
+            else:
+                assert False
+            if min != 1 or max != 1:
+                pattern = pattern.optimize()
+                pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
+
+        if name is not None:
+            pattern.name = name
+        return pattern.optimize()
+
+    def compile_basic(self, nodes, repeat=None):
+        # Compile STRING | NAME [Details] | (...) | [...]
+        assert len(nodes) >= 1
+        node = nodes[0]
+        if node.type == token.STRING:
+            value = literals.evalString(node.value)
+            return pytree.LeafPattern(content=value)
+        elif node.type == token.NAME:
+            value = node.value
+            if value.isupper():
+                if value not in TOKEN_MAP:
+                    raise SyntaxError("Invalid token: %r" % value)
+                return pytree.LeafPattern(TOKEN_MAP[value])
+            else:
+                if value == "any":
+                    type = None
+                elif not value.startswith("_"):
+                    type = getattr(self.pysyms, value, None)
+                    if type is None:
+                        raise SyntaxError("Invalid symbol: %r" % value)
+                if nodes[1:]: # Details present
+                    content = [self.compile_node(nodes[1].children[1])]
+                else:
+                    content = None
+                return pytree.NodePattern(type, content)
+        elif node.value == "(":
+            return self.compile_node(nodes[1])
+        elif node.value == "[":
+            assert repeat is None
+            subpattern = self.compile_node(nodes[1])
+            return pytree.WildcardPattern([[subpattern]], min=0, max=1)
+        assert False, node
+
+    def get_int(self, node):
+        assert node.type == token.NUMBER
+        return int(node.value)
+
+
+# Map named tokens to the type value for a LeafPattern
+TOKEN_MAP = {"NAME": token.NAME,
+             "STRING": token.STRING,
+             "NUMBER": token.NUMBER,
+             "TOKEN": None}
+
+
+def pattern_convert(grammar, raw_node_info):
+    """Converts raw node information to a Node or Leaf instance."""
+    type, value, context, children = raw_node_info
+    if children or type in grammar.number2symbol:
+        return pytree.Node(type, children, context=context)
+    else:
+        return pytree.Leaf(type, value, context=context)
+
+
+def compile_pattern(pattern):
+    return PatternCompiler().compile_pattern(pattern)