blob: cf3fcf7e99fd11d55fa5567801632f73629ba58c [file] [log] [blame]
Martin v. Löwisef04c442008-03-19 05:04:44 +00001# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
4"""Parser engine for the grammar tables generated by pgen.
5
6The grammar table must be loaded first.
7
8See Parser/parser.c in the Python distribution for additional info on
9how this parsing engine works.
10
11"""
12
Martin v. Löwisef04c442008-03-19 05:04:44 +000013# Local imports
14from . import token
15
16class ParseError(Exception):
17 """Exception to signal the parser is stuck."""
18
19 def __init__(self, msg, type, value, context):
20 Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
21 (msg, type, value, context))
22 self.msg = msg
23 self.type = type
24 self.value = value
25 self.context = context
26
Anthony Sottilec57e6e22018-11-27 10:39:49 -080027 def __reduce__(self):
28 return type(self), (self.msg, self.type, self.value, self.context)
29
Martin v. Löwisef04c442008-03-19 05:04:44 +000030class Parser(object):
31 """Parser engine.
32
33 The proper usage sequence is:
34
35 p = Parser(grammar, [converter]) # create instance
36 p.setup([start]) # prepare for parsing
37 <for each input token>:
38 if p.addtoken(...): # parse a token; may raise ParseError
39 break
40 root = p.rootnode # root of abstract syntax tree
41
42 A Parser instance may be reused by calling setup() repeatedly.
43
44 A Parser instance contains state pertaining to the current token
45 sequence, and should not be used concurrently by different threads
46 to parse separate token sequences.
47
48 See driver.py for how to get input tokens by tokenizing a file or
49 string.
50
51 Parsing is complete when addtoken() returns True; the root of the
52 abstract syntax tree can then be retrieved from the rootnode
53 instance variable. When a syntax error occurs, addtoken() raises
54 the ParseError exception. There is no error recovery; the parser
55 cannot be used after a syntax error was reported (but it can be
56 reinitialized by calling setup()).
57
58 """
59
60 def __init__(self, grammar, convert=None):
61 """Constructor.
62
63 The grammar argument is a grammar.Grammar instance; see the
64 grammar module for more information.
65
66 The parser is not ready yet for parsing; you must call the
67 setup() method to get it started.
68
69 The optional convert argument is a function mapping concrete
70 syntax tree nodes to abstract syntax tree nodes. If not
71 given, no conversion is done and the syntax tree produced is
72 the concrete syntax tree. If given, it must be a function of
73 two arguments, the first being the grammar (a grammar.Grammar
74 instance), and the second being the concrete syntax tree node
75 to be converted. The syntax tree is converted from the bottom
76 up.
77
78 A concrete syntax tree node is a (type, value, context, nodes)
79 tuple, where type is the node type (a token or symbol number),
80 value is None for symbols and a string for tokens, context is
81 None or an opaque value used for error reporting (typically a
82 (lineno, offset) pair), and nodes is a list of children for
83 symbols, and None for tokens.
84
85 An abstract syntax tree node may be anything; this is entirely
86 up to the converter function.
87
88 """
89 self.grammar = grammar
90 self.convert = convert or (lambda grammar, node: node)
91
92 def setup(self, start=None):
93 """Prepare for parsing.
94
95 This *must* be called before starting to parse.
96
97 The optional argument is an alternative start symbol; it
98 defaults to the grammar's start symbol.
99
100 You can use a Parser instance to parse any number of programs;
101 each time you call setup() the parser is reset to an initial
102 state determined by the (implicit or explicit) start symbol.
103
104 """
105 if start is None:
106 start = self.grammar.start
107 # Each stack entry is a tuple: (dfa, state, node).
108 # A node is a tuple: (type, value, context, children),
109 # where children is a list of nodes or None, and context may be None.
110 newnode = (start, None, None, [])
111 stackentry = (self.grammar.dfas[start], 0, newnode)
112 self.stack = [stackentry]
113 self.rootnode = None
114 self.used_names = set() # Aliased to self.rootnode.used_names in pop()
115
116 def addtoken(self, type, value, context):
117 """Add a token; return True iff this is the end of the program."""
118 # Map from token to label
119 ilabel = self.classify(type, value, context)
120 # Loop until the token is shifted; may raise exceptions
121 while True:
122 dfa, state, node = self.stack[-1]
123 states, first = dfa
124 arcs = states[state]
125 # Look for a state with this label
126 for i, newstate in arcs:
127 t, v = self.grammar.labels[i]
128 if ilabel == i:
129 # Look it up in the list of labels
130 assert t < 256
131 # Shift a token; we're done with it
132 self.shift(type, value, newstate, context)
133 # Pop while we are in an accept-only state
134 state = newstate
135 while states[state] == [(0, state)]:
136 self.pop()
137 if not self.stack:
138 # Done parsing!
139 return True
140 dfa, state, node = self.stack[-1]
141 states, first = dfa
142 # Done with this token
143 return False
144 elif t >= 256:
145 # See if it's a symbol and if we're in its first set
146 itsdfa = self.grammar.dfas[t]
147 itsstates, itsfirst = itsdfa
148 if ilabel in itsfirst:
149 # Push a symbol
150 self.push(t, self.grammar.dfas[t], newstate, context)
151 break # To continue the outer while loop
152 else:
153 if (0, state) in arcs:
154 # An accepting state, pop it and try something else
155 self.pop()
156 if not self.stack:
157 # Done parsing, but another token is input
158 raise ParseError("too much input",
159 type, value, context)
160 else:
161 # No success finding a transition
162 raise ParseError("bad input", type, value, context)
163
164 def classify(self, type, value, context):
165 """Turn a token into a label. (Internal)"""
166 if type == token.NAME:
167 # Keep a listing of all used names
168 self.used_names.add(value)
169 # Check for reserved words
170 ilabel = self.grammar.keywords.get(value)
171 if ilabel is not None:
172 return ilabel
173 ilabel = self.grammar.tokens.get(type)
174 if ilabel is None:
175 raise ParseError("bad token", type, value, context)
176 return ilabel
177
178 def shift(self, type, value, newstate, context):
179 """Shift a token. (Internal)"""
180 dfa, state, node = self.stack[-1]
181 newnode = (type, value, context, None)
182 newnode = self.convert(self.grammar, newnode)
183 if newnode is not None:
184 node[-1].append(newnode)
185 self.stack[-1] = (dfa, newstate, node)
186
187 def push(self, type, newdfa, newstate, context):
188 """Push a nonterminal. (Internal)"""
189 dfa, state, node = self.stack[-1]
190 newnode = (type, None, context, [])
191 self.stack[-1] = (dfa, newstate, node)
192 self.stack.append((newdfa, 0, newnode))
193
194 def pop(self):
195 """Pop a nonterminal. (Internal)"""
196 popdfa, popstate, popnode = self.stack.pop()
197 newnode = self.convert(self.grammar, popnode)
198 if newnode is not None:
199 if self.stack:
200 dfa, state, node = self.stack[-1]
201 node[-1].append(newnode)
202 else:
203 self.rootnode = newnode
204 self.rootnode.used_names = self.used_names