blob: 9b1ac2a74b6817554659a163ed0de1686d48c758 [file] [log] [blame]
Martin v. Löwisef04c442008-03-19 05:04:44 +00001# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
4"""Parser engine for the grammar tables generated by pgen.
5
6The grammar table must be loaded first.
7
8See Parser/parser.c in the Python distribution for additional info on
9how this parsing engine works.
10
11"""
12
13# Get a usable 'set' constructor
14try:
15 set
16except NameError:
17 from sets import Set as set
18
19# Local imports
20from . import token
21
22class ParseError(Exception):
23 """Exception to signal the parser is stuck."""
24
25 def __init__(self, msg, type, value, context):
26 Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
27 (msg, type, value, context))
28 self.msg = msg
29 self.type = type
30 self.value = value
31 self.context = context
32
33class Parser(object):
34 """Parser engine.
35
36 The proper usage sequence is:
37
38 p = Parser(grammar, [converter]) # create instance
39 p.setup([start]) # prepare for parsing
40 <for each input token>:
41 if p.addtoken(...): # parse a token; may raise ParseError
42 break
43 root = p.rootnode # root of abstract syntax tree
44
45 A Parser instance may be reused by calling setup() repeatedly.
46
47 A Parser instance contains state pertaining to the current token
48 sequence, and should not be used concurrently by different threads
49 to parse separate token sequences.
50
51 See driver.py for how to get input tokens by tokenizing a file or
52 string.
53
54 Parsing is complete when addtoken() returns True; the root of the
55 abstract syntax tree can then be retrieved from the rootnode
56 instance variable. When a syntax error occurs, addtoken() raises
57 the ParseError exception. There is no error recovery; the parser
58 cannot be used after a syntax error was reported (but it can be
59 reinitialized by calling setup()).
60
61 """
62
63 def __init__(self, grammar, convert=None):
64 """Constructor.
65
66 The grammar argument is a grammar.Grammar instance; see the
67 grammar module for more information.
68
69 The parser is not ready yet for parsing; you must call the
70 setup() method to get it started.
71
72 The optional convert argument is a function mapping concrete
73 syntax tree nodes to abstract syntax tree nodes. If not
74 given, no conversion is done and the syntax tree produced is
75 the concrete syntax tree. If given, it must be a function of
76 two arguments, the first being the grammar (a grammar.Grammar
77 instance), and the second being the concrete syntax tree node
78 to be converted. The syntax tree is converted from the bottom
79 up.
80
81 A concrete syntax tree node is a (type, value, context, nodes)
82 tuple, where type is the node type (a token or symbol number),
83 value is None for symbols and a string for tokens, context is
84 None or an opaque value used for error reporting (typically a
85 (lineno, offset) pair), and nodes is a list of children for
86 symbols, and None for tokens.
87
88 An abstract syntax tree node may be anything; this is entirely
89 up to the converter function.
90
91 """
92 self.grammar = grammar
93 self.convert = convert or (lambda grammar, node: node)
94
95 def setup(self, start=None):
96 """Prepare for parsing.
97
98 This *must* be called before starting to parse.
99
100 The optional argument is an alternative start symbol; it
101 defaults to the grammar's start symbol.
102
103 You can use a Parser instance to parse any number of programs;
104 each time you call setup() the parser is reset to an initial
105 state determined by the (implicit or explicit) start symbol.
106
107 """
108 if start is None:
109 start = self.grammar.start
110 # Each stack entry is a tuple: (dfa, state, node).
111 # A node is a tuple: (type, value, context, children),
112 # where children is a list of nodes or None, and context may be None.
113 newnode = (start, None, None, [])
114 stackentry = (self.grammar.dfas[start], 0, newnode)
115 self.stack = [stackentry]
116 self.rootnode = None
117 self.used_names = set() # Aliased to self.rootnode.used_names in pop()
118
119 def addtoken(self, type, value, context):
120 """Add a token; return True iff this is the end of the program."""
121 # Map from token to label
122 ilabel = self.classify(type, value, context)
123 # Loop until the token is shifted; may raise exceptions
124 while True:
125 dfa, state, node = self.stack[-1]
126 states, first = dfa
127 arcs = states[state]
128 # Look for a state with this label
129 for i, newstate in arcs:
130 t, v = self.grammar.labels[i]
131 if ilabel == i:
132 # Look it up in the list of labels
133 assert t < 256
134 # Shift a token; we're done with it
135 self.shift(type, value, newstate, context)
136 # Pop while we are in an accept-only state
137 state = newstate
138 while states[state] == [(0, state)]:
139 self.pop()
140 if not self.stack:
141 # Done parsing!
142 return True
143 dfa, state, node = self.stack[-1]
144 states, first = dfa
145 # Done with this token
146 return False
147 elif t >= 256:
148 # See if it's a symbol and if we're in its first set
149 itsdfa = self.grammar.dfas[t]
150 itsstates, itsfirst = itsdfa
151 if ilabel in itsfirst:
152 # Push a symbol
153 self.push(t, self.grammar.dfas[t], newstate, context)
154 break # To continue the outer while loop
155 else:
156 if (0, state) in arcs:
157 # An accepting state, pop it and try something else
158 self.pop()
159 if not self.stack:
160 # Done parsing, but another token is input
161 raise ParseError("too much input",
162 type, value, context)
163 else:
164 # No success finding a transition
165 raise ParseError("bad input", type, value, context)
166
167 def classify(self, type, value, context):
168 """Turn a token into a label. (Internal)"""
169 if type == token.NAME:
170 # Keep a listing of all used names
171 self.used_names.add(value)
172 # Check for reserved words
173 ilabel = self.grammar.keywords.get(value)
174 if ilabel is not None:
175 return ilabel
176 ilabel = self.grammar.tokens.get(type)
177 if ilabel is None:
178 raise ParseError("bad token", type, value, context)
179 return ilabel
180
181 def shift(self, type, value, newstate, context):
182 """Shift a token. (Internal)"""
183 dfa, state, node = self.stack[-1]
184 newnode = (type, value, context, None)
185 newnode = self.convert(self.grammar, newnode)
186 if newnode is not None:
187 node[-1].append(newnode)
188 self.stack[-1] = (dfa, newstate, node)
189
190 def push(self, type, newdfa, newstate, context):
191 """Push a nonterminal. (Internal)"""
192 dfa, state, node = self.stack[-1]
193 newnode = (type, None, context, [])
194 self.stack[-1] = (dfa, newstate, node)
195 self.stack.append((newdfa, 0, newnode))
196
197 def pop(self):
198 """Pop a nonterminal. (Internal)"""
199 popdfa, popstate, popnode = self.stack.pop()
200 newnode = self.convert(self.grammar, popnode)
201 if newnode is not None:
202 if self.stack:
203 dfa, state, node = self.stack[-1]
204 node[-1].append(newnode)
205 else:
206 self.rootnode = newnode
207 self.rootnode.used_names = self.used_names