blob: cbc58e759d5374e3980f1402ab708d0888cda59c [file] [log] [blame]
Martin v. Löwisef04c442008-03-19 05:04:44 +00001# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
4# Modifications:
5# Copyright 2006 Google, Inc. All Rights Reserved.
6# Licensed to PSF under a Contributor Agreement.
7
8"""Parser driver.
9
10This provides a high-level interface to parse a file into a syntax tree.
11
12"""
13
14__author__ = "Guido van Rossum <guido@python.org>"
15
16__all__ = ["Driver", "load_grammar"]
17
18# Python imports
Benjamin Petersond481e3d2009-05-09 19:42:23 +000019import codecs
Benjamin Peterson2e7965e2011-02-26 22:12:10 +000020import io
Martin v. Löwisef04c442008-03-19 05:04:44 +000021import os
22import logging
Benjamin Peterson8a587712017-12-22 12:18:33 -080023import pkgutil
Martin v. Löwisef04c442008-03-19 05:04:44 +000024import sys
25
26# Pgen imports
Martin v. Löwis3faa84f2008-03-22 00:07:09 +000027from . import grammar, parse, token, tokenize, pgen
Martin v. Löwisef04c442008-03-19 05:04:44 +000028
29
30class Driver(object):
31
32 def __init__(self, grammar, convert=None, logger=None):
33 self.grammar = grammar
34 if logger is None:
35 logger = logging.getLogger()
36 self.logger = logger
37 self.convert = convert
38
39 def parse_tokens(self, tokens, debug=False):
40 """Parse a series of tokens and return the syntax tree."""
41 # XXX Move the prefix computation into a wrapper around tokenize.
42 p = parse.Parser(self.grammar, self.convert)
43 p.setup()
44 lineno = 1
45 column = 0
46 type = value = start = end = line_text = None
47 prefix = ""
48 for quintuple in tokens:
49 type, value, start, end, line_text = quintuple
50 if start != (lineno, column):
51 assert (lineno, column) <= start, ((lineno, column), start)
52 s_lineno, s_column = start
53 if lineno < s_lineno:
54 prefix += "\n" * (s_lineno - lineno)
55 lineno = s_lineno
56 column = 0
57 if column < s_column:
58 prefix += line_text[column:s_column]
59 column = s_column
60 if type in (tokenize.COMMENT, tokenize.NL):
61 prefix += value
62 lineno, column = end
63 if value.endswith("\n"):
64 lineno += 1
65 column = 0
66 continue
67 if type == token.OP:
68 type = grammar.opmap[value]
69 if debug:
70 self.logger.debug("%s %r (prefix=%r)",
71 token.tok_name[type], value, prefix)
72 if p.addtoken(type, value, (prefix, start)):
73 if debug:
74 self.logger.debug("Stop.")
75 break
76 prefix = ""
77 lineno, column = end
78 if value.endswith("\n"):
79 lineno += 1
80 column = 0
81 else:
82 # We never broke out -- EOF is too soon (how can this happen???)
Benjamin Peterson28d88b42009-01-09 03:03:23 +000083 raise parse.ParseError("incomplete input",
84 type, value, (prefix, start))
Martin v. Löwisef04c442008-03-19 05:04:44 +000085 return p.rootnode
86
87 def parse_stream_raw(self, stream, debug=False):
88 """Parse a stream and return the syntax tree."""
89 tokens = tokenize.generate_tokens(stream.readline)
90 return self.parse_tokens(tokens, debug)
91
92 def parse_stream(self, stream, debug=False):
93 """Parse a stream and return the syntax tree."""
94 return self.parse_stream_raw(stream, debug)
95
Benjamin Petersond481e3d2009-05-09 19:42:23 +000096 def parse_file(self, filename, encoding=None, debug=False):
Martin v. Löwisef04c442008-03-19 05:04:44 +000097 """Parse a file and return the syntax tree."""
Victor Stinner272d8882017-06-16 08:59:01 +020098 with io.open(filename, "r", encoding=encoding) as stream:
Martin v. Löwisef04c442008-03-19 05:04:44 +000099 return self.parse_stream(stream, debug)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000100
101 def parse_string(self, text, debug=False):
102 """Parse a string and return the syntax tree."""
Benjamin Peterson2e7965e2011-02-26 22:12:10 +0000103 tokens = tokenize.generate_tokens(io.StringIO(text).readline)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000104 return self.parse_tokens(tokens, debug)
105
106
Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)dd1c6382016-09-08 00:40:07 +0000107def _generate_pickle_name(gt):
108 head, tail = os.path.splitext(gt)
109 if tail == ".txt":
110 tail = ""
111 return head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
112
113
Martin v. Löwisef04c442008-03-19 05:04:44 +0000114def load_grammar(gt="Grammar.txt", gp=None,
115 save=True, force=False, logger=None):
116 """Load the grammar (maybe from a pickle)."""
117 if logger is None:
118 logger = logging.getLogger()
Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)dd1c6382016-09-08 00:40:07 +0000119 gp = _generate_pickle_name(gt) if gp is None else gp
Martin v. Löwisef04c442008-03-19 05:04:44 +0000120 if force or not _newer(gp, gt):
121 logger.info("Generating grammar tables from %s", gt)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000122 g = pgen.generate_grammar(gt)
123 if save:
124 logger.info("Writing grammar tables to %s", gp)
Martin v. Löwis346c9212008-05-25 17:22:03 +0000125 try:
126 g.dump(gp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200127 except OSError as e:
Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)dd1c6382016-09-08 00:40:07 +0000128 logger.info("Writing failed: %s", e)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000129 else:
130 g = grammar.Grammar()
131 g.load(gp)
132 return g
133
134
135def _newer(a, b):
136 """Inquire whether file a was written since file b."""
137 if not os.path.exists(a):
138 return False
139 if not os.path.exists(b):
140 return True
141 return os.path.getmtime(a) >= os.path.getmtime(b)
Ned Deily24ec0542012-09-08 19:04:47 -0700142
143
Benjamin Peterson8a587712017-12-22 12:18:33 -0800144def load_packaged_grammar(package, grammar_source):
145 """Normally, loads a pickled grammar by doing
146 pkgutil.get_data(package, pickled_grammar)
147 where *pickled_grammar* is computed from *grammar_source* by adding the
148 Python version and using a ``.pickle`` extension.
149
150 However, if *grammar_source* is an extant file, load_grammar(grammar_source)
Benjamin Petersond11e8e02017-12-22 20:48:13 -0800151 is called instead. This facilitates using a packaged grammar file when needed
Benjamin Peterson8a587712017-12-22 12:18:33 -0800152 but preserves load_grammar's automatic regeneration behavior when possible.
153
154 """
155 if os.path.isfile(grammar_source):
156 return load_grammar(grammar_source)
157 pickled_name = _generate_pickle_name(os.path.basename(grammar_source))
158 data = pkgutil.get_data(package, pickled_name)
159 g = grammar.Grammar()
160 g.loads(data)
161 return g
162
163
Ned Deily24ec0542012-09-08 19:04:47 -0700164def main(*args):
165 """Main program, when run as a script: produce grammar pickle files.
166
167 Calls load_grammar for each argument, a path to a grammar text file.
168 """
169 if not args:
170 args = sys.argv[1:]
171 logging.basicConfig(level=logging.INFO, stream=sys.stdout,
172 format='%(message)s')
173 for gt in args:
174 load_grammar(gt, save=True, force=True)
175 return True
176
177if __name__ == "__main__":
178 sys.exit(int(not main()))