lib2to3.pgen3.driver.load_grammar() now creates a stable cache file
between runs given the same Grammar.txt input regardless of the hash
randomization setting.
Backport of 186bb8dc5540 from 3.5. Done in 2.7 per the lib2to3 exemption.
diff --git a/Lib/lib2to3/pgen2/driver.py b/Lib/lib2to3/pgen2/driver.py
index 39dafb9..ce601bb 100644
--- a/Lib/lib2to3/pgen2/driver.py
+++ b/Lib/lib2to3/pgen2/driver.py
@@ -106,16 +106,19 @@
return self.parse_tokens(tokens, debug)
+def _generate_pickle_name(gt):
+ head, tail = os.path.splitext(gt)
+ if tail == ".txt":
+ tail = ""
+ return head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
+
+
def load_grammar(gt="Grammar.txt", gp=None,
save=True, force=False, logger=None):
"""Load the grammar (maybe from a pickle)."""
if logger is None:
logger = logging.getLogger()
- if gp is None:
- head, tail = os.path.splitext(gt)
- if tail == ".txt":
- tail = ""
- gp = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
+ gp = _generate_pickle_name(gt) if gp is None else gp
if force or not _newer(gp, gt):
logger.info("Generating grammar tables from %s", gt)
g = pgen.generate_grammar(gt)
@@ -123,8 +126,8 @@
logger.info("Writing grammar tables to %s", gp)
try:
g.dump(gp)
- except IOError, e:
- logger.info("Writing failed:"+str(e))
+ except IOError as e:
+ logger.info("Writing failed: %s", e)
else:
g = grammar.Grammar()
g.load(gp)
diff --git a/Lib/lib2to3/pgen2/grammar.py b/Lib/lib2to3/pgen2/grammar.py
index 8220b0a..75255e9 100644
--- a/Lib/lib2to3/pgen2/grammar.py
+++ b/Lib/lib2to3/pgen2/grammar.py
@@ -13,6 +13,7 @@
"""
# Python imports
+import collections
import pickle
# Local imports
@@ -85,10 +86,21 @@
self.start = 256
def dump(self, filename):
- """Dump the grammar tables to a pickle file."""
- f = open(filename, "wb")
- pickle.dump(self.__dict__, f, 2)
- f.close()
+ """Dump the grammar tables to a pickle file.
+
+ dump() recursively changes all dict to OrderedDict, so the pickled file
+ is not exactly the same as what was passed in to dump(). load() uses the
+ pickled file to create the tables, but only changes OrderedDict to dict
+ at the top level; it does not recursively change OrderedDict to dict.
+ So, the loaded tables are different from the original tables that were
+ passed to load() in that some of the OrderedDict (from the pickled file)
+ are not changed back to dict. For parsing, this has no effect on
+ performance because OrderedDict uses dict's __getitem__ with nothing in
+ between.
+ """
+ with open(filename, "wb") as f:
+ d = _make_deterministic(self.__dict__)
+ pickle.dump(d, f, 2)
def load(self, filename):
"""Load the grammar tables from a pickle file."""
@@ -126,6 +138,17 @@
print "start", self.start
+def _make_deterministic(top):
+ if isinstance(top, dict):
+ return collections.OrderedDict(
+ sorted(((k, _make_deterministic(v)) for k, v in top.iteritems())))
+ if isinstance(top, list):
+ return [_make_deterministic(e) for e in top]
+ if isinstance(top, tuple):
+ return tuple(_make_deterministic(e) for e in top)
+ return top
+
+
# Map from operator to number (since tokenize doesn't do this)
opmap_raw = """
diff --git a/Lib/lib2to3/pgen2/pgen.py b/Lib/lib2to3/pgen2/pgen.py
index 63084a4..ed16992 100644
--- a/Lib/lib2to3/pgen2/pgen.py
+++ b/Lib/lib2to3/pgen2/pgen.py
@@ -39,7 +39,7 @@
states = []
for state in dfa:
arcs = []
- for label, next in state.arcs.iteritems():
+ for label, next in sorted(state.arcs.iteritems()):
arcs.append((self.make_label(c, label), dfa.index(next)))
if state.isfinal:
arcs.append((0, dfa.index(state)))
@@ -52,7 +52,7 @@
def make_first(self, c, name):
rawfirst = self.first[name]
first = {}
- for label in rawfirst:
+ for label in sorted(rawfirst):
ilabel = self.make_label(c, label)
##assert ilabel not in first # XXX failed on <> ... !=
first[ilabel] = 1
@@ -192,7 +192,7 @@
for label, next in nfastate.arcs:
if label is not None:
addclosure(next, arcs.setdefault(label, {}))
- for label, nfaset in arcs.iteritems():
+ for label, nfaset in sorted(arcs.iteritems()):
for st in states:
if st.nfaset == nfaset:
break
@@ -222,7 +222,7 @@
print "Dump of DFA for", name
for i, state in enumerate(dfa):
print " State", i, state.isfinal and "(final)" or ""
- for label, next in state.arcs.iteritems():
+ for label, next in sorted(state.arcs.iteritems()):
print " %s -> %d" % (label, dfa.index(next))
def simplify_dfa(self, dfa):
diff --git a/Lib/lib2to3/tests/support.py b/Lib/lib2to3/tests/support.py
index 3646935..8f12de9 100644
--- a/Lib/lib2to3/tests/support.py
+++ b/Lib/lib2to3/tests/support.py
@@ -11,13 +11,13 @@
# Local imports
from lib2to3 import pytree, refactor
-from lib2to3.pgen2 import driver
+from lib2to3.pgen2 import driver as pgen2_driver
test_dir = os.path.dirname(__file__)
proj_dir = os.path.normpath(os.path.join(test_dir, ".."))
grammar_path = os.path.join(test_dir, "..", "Grammar.txt")
-grammar = driver.load_grammar(grammar_path)
-driver = driver.Driver(grammar, convert=pytree.convert)
+grammar = pgen2_driver.load_grammar(grammar_path)
+driver = pgen2_driver.Driver(grammar, convert=pytree.convert)
def parse_string(string):
return driver.parse_string(reformat(string), debug=True)
diff --git a/Lib/lib2to3/tests/test_parser.py b/Lib/lib2to3/tests/test_parser.py
index 9c5463a..cf484a1 100644
--- a/Lib/lib2to3/tests/test_parser.py
+++ b/Lib/lib2to3/tests/test_parser.py
@@ -6,17 +6,20 @@
test_grammar.py files from both Python 2 and Python 3.
"""
-from __future__ import with_statement
-
# Testing imports
from . import support
from .support import driver, test_dir
# Python imports
import os
+import shutil
+import subprocess
import sys
+import tempfile
+import unittest
# Local imports
+from lib2to3.pgen2 import driver as pgen2_driver
from lib2to3.pgen2 import tokenize
from ..pgen2.parse import ParseError
from lib2to3.pygram import python_symbols as syms
@@ -31,6 +34,71 @@
self.assertEqual(t.children[1].children[0].type, syms.print_stmt)
+class TestPgen2Caching(support.TestCase):
+ def test_load_grammar_from_txt_file(self):
+ pgen2_driver.load_grammar(support.grammar_path, save=False, force=True)
+
+ def test_load_grammar_from_pickle(self):
+ # Make a copy of the grammar file in a temp directory we are
+ # guaranteed to be able to write to.
+ tmpdir = tempfile.mkdtemp()
+ try:
+ grammar_copy = os.path.join(
+ tmpdir, os.path.basename(support.grammar_path))
+ shutil.copy(support.grammar_path, grammar_copy)
+ pickle_name = pgen2_driver._generate_pickle_name(grammar_copy)
+
+ pgen2_driver.load_grammar(grammar_copy, save=True, force=True)
+ self.assertTrue(os.path.exists(pickle_name))
+
+ os.unlink(grammar_copy) # Only the pickle remains...
+ pgen2_driver.load_grammar(grammar_copy, save=False, force=False)
+ finally:
+ shutil.rmtree(tmpdir)
+
+ @unittest.skipIf(sys.executable is None, 'sys.executable required')
+ def test_load_grammar_from_subprocess(self):
+ tmpdir = tempfile.mkdtemp()
+ tmpsubdir = os.path.join(tmpdir, 'subdir')
+ try:
+ os.mkdir(tmpsubdir)
+ grammar_base = os.path.basename(support.grammar_path)
+ grammar_copy = os.path.join(tmpdir, grammar_base)
+ grammar_sub_copy = os.path.join(tmpsubdir, grammar_base)
+ shutil.copy(support.grammar_path, grammar_copy)
+ shutil.copy(support.grammar_path, grammar_sub_copy)
+ pickle_name = pgen2_driver._generate_pickle_name(grammar_copy)
+ pickle_sub_name = pgen2_driver._generate_pickle_name(
+ grammar_sub_copy)
+ self.assertNotEqual(pickle_name, pickle_sub_name)
+
+ # Generate a pickle file from this process.
+ pgen2_driver.load_grammar(grammar_copy, save=True, force=True)
+ self.assertTrue(os.path.exists(pickle_name))
+
+ # Generate a new pickle file in a subprocess with a most likely
+ # different hash randomization seed.
+ sub_env = dict(os.environ)
+ sub_env['PYTHONHASHSEED'] = 'random'
+ subprocess.check_call(
+ [sys.executable, '-c', """
+from lib2to3.pgen2 import driver as pgen2_driver
+pgen2_driver.load_grammar(%r, save=True, force=True)
+ """ % (grammar_sub_copy,)],
+ env=sub_env)
+ self.assertTrue(os.path.exists(pickle_sub_name))
+
+ with open(pickle_name, 'rb') as pickle_f_1, \
+ open(pickle_sub_name, 'rb') as pickle_f_2:
+ self.assertEqual(
+ pickle_f_1.read(), pickle_f_2.read(),
+ msg='Grammar caches generated using different hash seeds'
+ ' were not identical.')
+ finally:
+ shutil.rmtree(tmpdir)
+
+
+
class GrammarTest(support.TestCase):
def validate(self, code):
support.parse_string(code)