blob: a5133309c6c1eaa4afdc74e81eec76cde436561c [file] [log] [blame]
Martin v. Löwis5e37bae2008-03-19 04:43:46 +00001# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
4# Modifications:
5# Copyright 2006 Google, Inc. All Rights Reserved.
6# Licensed to PSF under a Contributor Agreement.
7
8"""Parser driver.
9
10This provides a high-level interface to parse a file into a syntax tree.
11
12"""
13
14__author__ = "Guido van Rossum <guido@python.org>"
15
16__all__ = ["Driver", "load_grammar"]
17
18# Python imports
Benjamin Peterson84ad84e2009-05-09 01:01:14 +000019import codecs
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000020import os
21import logging
Benjamin Peterson770a8022017-12-22 12:52:10 -080022import pkgutil
Benjamin Peterson8059e1e2011-02-26 22:11:02 +000023import StringIO
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000024import sys
25
26# Pgen imports
Martin v. Löwisbaf267c2008-03-22 00:01:12 +000027from . import grammar, parse, token, tokenize, pgen
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000028
29
30class Driver(object):
31
32 def __init__(self, grammar, convert=None, logger=None):
33 self.grammar = grammar
34 if logger is None:
35 logger = logging.getLogger()
36 self.logger = logger
37 self.convert = convert
38
39 def parse_tokens(self, tokens, debug=False):
40 """Parse a series of tokens and return the syntax tree."""
41 # XXX Move the prefix computation into a wrapper around tokenize.
42 p = parse.Parser(self.grammar, self.convert)
43 p.setup()
44 lineno = 1
45 column = 0
46 type = value = start = end = line_text = None
Benjamin Peterson84ad84e2009-05-09 01:01:14 +000047 prefix = u""
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000048 for quintuple in tokens:
49 type, value, start, end, line_text = quintuple
50 if start != (lineno, column):
51 assert (lineno, column) <= start, ((lineno, column), start)
52 s_lineno, s_column = start
53 if lineno < s_lineno:
54 prefix += "\n" * (s_lineno - lineno)
55 lineno = s_lineno
56 column = 0
57 if column < s_column:
58 prefix += line_text[column:s_column]
59 column = s_column
60 if type in (tokenize.COMMENT, tokenize.NL):
61 prefix += value
62 lineno, column = end
63 if value.endswith("\n"):
64 lineno += 1
65 column = 0
66 continue
67 if type == token.OP:
68 type = grammar.opmap[value]
69 if debug:
70 self.logger.debug("%s %r (prefix=%r)",
71 token.tok_name[type], value, prefix)
72 if p.addtoken(type, value, (prefix, start)):
73 if debug:
74 self.logger.debug("Stop.")
75 break
76 prefix = ""
77 lineno, column = end
78 if value.endswith("\n"):
79 lineno += 1
80 column = 0
81 else:
82 # We never broke out -- EOF is too soon (how can this happen???)
Benjamin Peterson25f221b2009-01-09 02:13:34 +000083 raise parse.ParseError("incomplete input",
84 type, value, (prefix, start))
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000085 return p.rootnode
86
87 def parse_stream_raw(self, stream, debug=False):
88 """Parse a stream and return the syntax tree."""
89 tokens = tokenize.generate_tokens(stream.readline)
90 return self.parse_tokens(tokens, debug)
91
92 def parse_stream(self, stream, debug=False):
93 """Parse a stream and return the syntax tree."""
94 return self.parse_stream_raw(stream, debug)
95
Benjamin Peterson84ad84e2009-05-09 01:01:14 +000096 def parse_file(self, filename, encoding=None, debug=False):
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000097 """Parse a file and return the syntax tree."""
Benjamin Peterson84ad84e2009-05-09 01:01:14 +000098 stream = codecs.open(filename, "r", encoding)
Martin v. Löwis5e37bae2008-03-19 04:43:46 +000099 try:
100 return self.parse_stream(stream, debug)
101 finally:
102 stream.close()
103
104 def parse_string(self, text, debug=False):
105 """Parse a string and return the syntax tree."""
Benjamin Peterson8059e1e2011-02-26 22:11:02 +0000106 tokens = tokenize.generate_tokens(StringIO.StringIO(text).readline)
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000107 return self.parse_tokens(tokens, debug)
108
109
Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)280bc222016-09-08 01:04:37 +0000110def _generate_pickle_name(gt):
111 head, tail = os.path.splitext(gt)
112 if tail == ".txt":
113 tail = ""
114 return head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
115
116
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000117def load_grammar(gt="Grammar.txt", gp=None,
118 save=True, force=False, logger=None):
119 """Load the grammar (maybe from a pickle)."""
120 if logger is None:
121 logger = logging.getLogger()
Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)280bc222016-09-08 01:04:37 +0000122 gp = _generate_pickle_name(gt) if gp is None else gp
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000123 if force or not _newer(gp, gt):
124 logger.info("Generating grammar tables from %s", gt)
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000125 g = pgen.generate_grammar(gt)
126 if save:
127 logger.info("Writing grammar tables to %s", gp)
Martin v. Löwisbbaa2eb2008-05-25 17:19:22 +0000128 try:
129 g.dump(gp)
Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)280bc222016-09-08 01:04:37 +0000130 except IOError as e:
131 logger.info("Writing failed: %s", e)
Martin v. Löwis5e37bae2008-03-19 04:43:46 +0000132 else:
133 g = grammar.Grammar()
134 g.load(gp)
135 return g
136
137
138def _newer(a, b):
139 """Inquire whether file a was written since file b."""
140 if not os.path.exists(a):
141 return False
142 if not os.path.exists(b):
143 return True
144 return os.path.getmtime(a) >= os.path.getmtime(b)
Ned Deily10fc1042012-09-08 18:47:27 -0700145
146
Benjamin Peterson770a8022017-12-22 12:52:10 -0800147def load_packaged_grammar(package, grammar_source):
148 """Normally, loads a pickled grammar by doing
149 pkgutil.get_data(package, pickled_grammar)
150 where *pickled_grammar* is computed from *grammar_source* by adding the
151 Python version and using a ``.pickle`` extension.
152
153 However, if *grammar_source* is an extant file, load_grammar(grammar_source)
Miss Islington (bot)8b9995f2017-12-22 21:49:49 -0800154 is called instead. This facilitates using a packaged grammar file when needed
Benjamin Peterson770a8022017-12-22 12:52:10 -0800155 but preserves load_grammar's automatic regeneration behavior when possible.
156
157 """
158 if os.path.isfile(grammar_source):
159 return load_grammar(grammar_source)
160 pickled_name = _generate_pickle_name(os.path.basename(grammar_source))
161 data = pkgutil.get_data(package, pickled_name)
162 g = grammar.Grammar()
163 g.loads(data)
164 return g
165
166
Ned Deily10fc1042012-09-08 18:47:27 -0700167def main(*args):
168 """Main program, when run as a script: produce grammar pickle files.
169
170 Calls load_grammar for each argument, a path to a grammar text file.
171 """
172 if not args:
173 args = sys.argv[1:]
174 logging.basicConfig(level=logging.INFO, stream=sys.stdout,
175 format='%(message)s')
176 for gt in args:
177 load_grammar(gt, save=True, force=True)
178 return True
179
180if __name__ == "__main__":
181 sys.exit(int(not main()))