Merged revisions 61596-61597 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk

........
  r61596 | martin.v.loewis | 2008-03-18 23:43:46 -0500 (Di, 18 Mär 2008) | 2 lines

  Import lib2to3.
........
  r61597 | martin.v.loewis | 2008-03-18 23:58:04 -0500 (Di, 18 Mär 2008) | 3 lines

  Initialized merge tracking via "svnmerge" with revisions "1-61595" from
  svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3
........
diff --git a/Lib/lib2to3/pgen2/conv.py b/Lib/lib2to3/pgen2/conv.py
new file mode 100644
index 0000000..5d788a1
--- /dev/null
+++ b/Lib/lib2to3/pgen2/conv.py
@@ -0,0 +1,257 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Convert graminit.[ch] spit out by pgen to Python code.
+
+Pgen is the Python parser generator.  It is useful to quickly create a
+parser from a grammar file in Python's grammar notation.  But I don't
+want my parsers to be written in C (yet), so I'm translating the
+parsing tables to Python data structures and writing a Python parse
+engine.
+
+Note that the token numbers are constants determined by the standard
+Python tokenizer.  The standard token module defines these numbers and
+their names (the names are not used much).  The token numbers are
+hardcoded into the Python tokenizer and into pgen.  A Python
+implementation of the Python tokenizer is also available, in the
+standard tokenize module.
+
+On the other hand, symbol numbers (representing the grammar's
+non-terminals) are assigned by pgen based on the actual grammar
+input.
+
+Note: this module is pretty much obsolete; the pgen module generates
+equivalent grammar tables directly from the Grammar.txt input file
+without having to invoke the Python pgen C program.
+
+"""
+
+# Python imports
+import re
+
+# Local imports
+from pgen2 import grammar, token
+
+
+class Converter(grammar.Grammar):
+    """Grammar subclass that reads classic pgen output files.
+
+    The run() method reads the tables as produced by the pgen parser
+    generator, typically contained in two C files, graminit.h and
+    graminit.c.  The other methods are for internal use only.
+
+    See the base class for more documentation.
+
+    """
+
+    def run(self, graminit_h, graminit_c):
+        """Load the grammar tables from the text files written by pgen."""
+        self.parse_graminit_h(graminit_h)
+        self.parse_graminit_c(graminit_c)
+        self.finish_off()
+
+    def parse_graminit_h(self, filename):
+        """Parse the .h file writen by pgen.  (Internal)
+
+        This file is a sequence of #define statements defining the
+        nonterminals of the grammar as numbers.  We build two tables
+        mapping the numbers to names and back.
+
+        """
+        try:
+            f = open(filename)
+        except IOError, err:
+            print "Can't open %s: %s" % (filename, err)
+            return False
+        self.symbol2number = {}
+        self.number2symbol = {}
+        lineno = 0
+        for line in f:
+            lineno += 1
+            mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line)
+            if not mo and line.strip():
+                print "%s(%s): can't parse %s" % (filename, lineno,
+                                                  line.strip())
+            else:
+                symbol, number = mo.groups()
+                number = int(number)
+                assert symbol not in self.symbol2number
+                assert number not in self.number2symbol
+                self.symbol2number[symbol] = number
+                self.number2symbol[number] = symbol
+        return True
+
+    def parse_graminit_c(self, filename):
+        """Parse the .c file writen by pgen.  (Internal)
+
+        The file looks as follows.  The first two lines are always this:
+
+        #include "pgenheaders.h"
+        #include "grammar.h"
+
+        After that come four blocks:
+
+        1) one or more state definitions
+        2) a table defining dfas
+        3) a table defining labels
+        4) a struct defining the grammar
+
+        A state definition has the following form:
+        - one or more arc arrays, each of the form:
+          static arc arcs_<n>_<m>[<k>] = {
+                  {<i>, <j>},
+                  ...
+          };
+        - followed by a state array, of the form:
+          static state states_<s>[<t>] = {
+                  {<k>, arcs_<n>_<m>},
+                  ...
+          };
+
+        """
+        try:
+            f = open(filename)
+        except IOError, err:
+            print "Can't open %s: %s" % (filename, err)
+            return False
+        # The code below essentially uses f's iterator-ness!
+        lineno = 0
+
+        # Expect the two #include lines
+        lineno, line = lineno+1, f.next()
+        assert line == '#include "pgenheaders.h"\n', (lineno, line)
+        lineno, line = lineno+1, f.next()
+        assert line == '#include "grammar.h"\n', (lineno, line)
+
+        # Parse the state definitions
+        lineno, line = lineno+1, f.next()
+        allarcs = {}
+        states = []
+        while line.startswith("static arc "):
+            while line.startswith("static arc "):
+                mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$",
+                              line)
+                assert mo, (lineno, line)
+                n, m, k = map(int, mo.groups())
+                arcs = []
+                for _ in range(k):
+                    lineno, line = lineno+1, f.next()
+                    mo = re.match(r"\s+{(\d+), (\d+)},$", line)
+                    assert mo, (lineno, line)
+                    i, j = map(int, mo.groups())
+                    arcs.append((i, j))
+                lineno, line = lineno+1, f.next()
+                assert line == "};\n", (lineno, line)
+                allarcs[(n, m)] = arcs
+                lineno, line = lineno+1, f.next()
+            mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line)
+            assert mo, (lineno, line)
+            s, t = map(int, mo.groups())
+            assert s == len(states), (lineno, line)
+            state = []
+            for _ in range(t):
+                lineno, line = lineno+1, f.next()
+                mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line)
+                assert mo, (lineno, line)
+                k, n, m = map(int, mo.groups())
+                arcs = allarcs[n, m]
+                assert k == len(arcs), (lineno, line)
+                state.append(arcs)
+            states.append(state)
+            lineno, line = lineno+1, f.next()
+            assert line == "};\n", (lineno, line)
+            lineno, line = lineno+1, f.next()
+        self.states = states
+
+        # Parse the dfas
+        dfas = {}
+        mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line)
+        assert mo, (lineno, line)
+        ndfas = int(mo.group(1))
+        for i in range(ndfas):
+            lineno, line = lineno+1, f.next()
+            mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$',
+                          line)
+            assert mo, (lineno, line)
+            symbol = mo.group(2)
+            number, x, y, z = map(int, mo.group(1, 3, 4, 5))
+            assert self.symbol2number[symbol] == number, (lineno, line)
+            assert self.number2symbol[number] == symbol, (lineno, line)
+            assert x == 0, (lineno, line)
+            state = states[z]
+            assert y == len(state), (lineno, line)
+            lineno, line = lineno+1, f.next()
+            mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line)
+            assert mo, (lineno, line)
+            first = {}
+            rawbitset = eval(mo.group(1))
+            for i, c in enumerate(rawbitset):
+                byte = ord(c)
+                for j in range(8):
+                    if byte & (1<<j):
+                        first[i*8 + j] = 1
+            dfas[number] = (state, first)
+        lineno, line = lineno+1, f.next()
+        assert line == "};\n", (lineno, line)
+        self.dfas = dfas
+
+        # Parse the labels
+        labels = []
+        lineno, line = lineno+1, f.next()
+        mo = re.match(r"static label labels\[(\d+)\] = {$", line)
+        assert mo, (lineno, line)
+        nlabels = int(mo.group(1))
+        for i in range(nlabels):
+            lineno, line = lineno+1, f.next()
+            mo = re.match(r'\s+{(\d+), (0|"\w+")},$', line)
+            assert mo, (lineno, line)
+            x, y = mo.groups()
+            x = int(x)
+            if y == "0":
+                y = None
+            else:
+                y = eval(y)
+            labels.append((x, y))
+        lineno, line = lineno+1, f.next()
+        assert line == "};\n", (lineno, line)
+        self.labels = labels
+
+        # Parse the grammar struct
+        lineno, line = lineno+1, f.next()
+        assert line == "grammar _PyParser_Grammar = {\n", (lineno, line)
+        lineno, line = lineno+1, f.next()
+        mo = re.match(r"\s+(\d+),$", line)
+        assert mo, (lineno, line)
+        ndfas = int(mo.group(1))
+        assert ndfas == len(self.dfas)
+        lineno, line = lineno+1, f.next()
+        assert line == "\tdfas,\n", (lineno, line)
+        lineno, line = lineno+1, f.next()
+        mo = re.match(r"\s+{(\d+), labels},$", line)
+        assert mo, (lineno, line)
+        nlabels = int(mo.group(1))
+        assert nlabels == len(self.labels), (lineno, line)
+        lineno, line = lineno+1, f.next()
+        mo = re.match(r"\s+(\d+)$", line)
+        assert mo, (lineno, line)
+        start = int(mo.group(1))
+        assert start in self.number2symbol, (lineno, line)
+        self.start = start
+        lineno, line = lineno+1, f.next()
+        assert line == "};\n", (lineno, line)
+        try:
+            lineno, line = lineno+1, f.next()
+        except StopIteration:
+            pass
+        else:
+            assert 0, (lineno, line)
+
+    def finish_off(self):
+        """Create additional useful structures.  (Internal)."""
+        self.keywords = {} # map from keyword strings to arc labels
+        self.tokens = {}   # map from numeric token values to arc labels
+        for ilabel, (type, value) in enumerate(self.labels):
+            if type == token.NAME and value is not None:
+                self.keywords[value] = ilabel
+            elif value is None:
+                self.tokens[type] = ilabel