blob: 6eaa925ef3c5da21780ff863d2d2eba04e31fa4c [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* Parser implementation */
3
4/* For a description, see the comments at end of this file */
5
6/* XXX To do: error recovery */
7
Guido van Rossum3f5da241990-12-20 15:06:42 +00008#include "pgenheaders.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00009#include "assert.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000010#include "token.h"
11#include "grammar.h"
12#include "node.h"
13#include "parser.h"
14#include "errcode.h"
15
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000016
Guido van Rossum408027e1996-12-30 16:17:54 +000017#ifdef Py_DEBUG
Guido van Rossum86bea461997-04-29 21:03:06 +000018extern int Py_DebugFlag;
19#define D(x) if (!Py_DebugFlag); else x
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000020#else
21#define D(x)
22#endif
23
24
25/* STACK DATA TYPE */
26
Tim Petersdbd9ba62000-07-09 03:09:57 +000027static void s_reset(stack *);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000028
29static void
Thomas Wouters23c9e002000-07-22 19:20:54 +000030s_reset(stack *s)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000031{
32 s->s_top = &s->s_base[MAXSTACK];
33}
34
35#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
36
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000037static int
Thomas Wouters23c9e002000-07-22 19:20:54 +000038s_push(register stack *s, dfa *d, node *parent)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000039{
40 register stackentry *top;
41 if (s->s_top == s->s_base) {
42 fprintf(stderr, "s_push: parser stack overflow\n");
Guido van Rossume3c3b272000-10-02 10:21:59 +000043 return E_NOMEM;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000044 }
45 top = --s->s_top;
46 top->s_dfa = d;
47 top->s_parent = parent;
48 top->s_state = 0;
49 return 0;
50}
51
Guido van Rossum408027e1996-12-30 16:17:54 +000052#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054static void
Thomas Wouters23c9e002000-07-22 19:20:54 +000055s_pop(register stack *s)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000056{
Guido van Rossum588633d1994-12-30 15:46:02 +000057 if (s_empty(s))
Guido van Rossum86bea461997-04-29 21:03:06 +000058 Py_FatalError("s_pop: parser stack underflow -- FATAL");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000059 s->s_top++;
60}
61
Guido van Rossum408027e1996-12-30 16:17:54 +000062#else /* !Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000063
64#define s_pop(s) (s)->s_top++
65
66#endif
67
68
69/* PARSER CREATION */
70
71parser_state *
Thomas Wouters23c9e002000-07-22 19:20:54 +000072PyParser_New(grammar *g, int start)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000073{
74 parser_state *ps;
75
76 if (!g->g_accel)
Guido van Rossum86bea461997-04-29 21:03:06 +000077 PyGrammar_AddAccelerators(g);
78 ps = PyMem_NEW(parser_state, 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000079 if (ps == NULL)
80 return NULL;
81 ps->p_grammar = g;
Guido van Rossum86bea461997-04-29 21:03:06 +000082 ps->p_tree = PyNode_New(start);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000083 if (ps->p_tree == NULL) {
Guido van Rossum86bea461997-04-29 21:03:06 +000084 PyMem_DEL(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000085 return NULL;
86 }
87 s_reset(&ps->p_stack);
Guido van Rossum86bea461997-04-29 21:03:06 +000088 (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000089 return ps;
90}
91
92void
Thomas Wouters23c9e002000-07-22 19:20:54 +000093PyParser_Delete(parser_state *ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000094{
Guido van Rossum99f02d41990-11-18 17:38:42 +000095 /* NB If you want to save the parse tree,
96 you must set p_tree to NULL before calling delparser! */
Guido van Rossum86bea461997-04-29 21:03:06 +000097 PyNode_Free(ps->p_tree);
98 PyMem_DEL(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
101
102/* PARSER STACK OPERATIONS */
103
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000104static int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000105shift(register stack *s, int type, char *str, int newstate, int lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000106{
Jeremy Hylton94988062000-06-20 19:10:44 +0000107 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000108 assert(!s_empty(s));
Jeremy Hylton94988062000-06-20 19:10:44 +0000109 err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno);
110 if (err)
111 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000112 s->s_top->s_state = newstate;
113 return 0;
114}
115
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000116static int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000117push(register stack *s, int type, dfa *d, int newstate, int lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000118{
Jeremy Hylton94988062000-06-20 19:10:44 +0000119 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000120 register node *n;
121 n = s->s_top->s_parent;
122 assert(!s_empty(s));
Jeremy Hylton94988062000-06-20 19:10:44 +0000123 err = PyNode_AddChild(n, type, (char *)NULL, lineno);
124 if (err)
125 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000126 s->s_top->s_state = newstate;
127 return s_push(s, d, CHILD(n, NCH(n)-1));
128}
129
130
131/* PARSER PROPER */
132
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000133static int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000134classify(grammar *g, int type, char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000135{
136 register int n = g->g_ll.ll_nlabels;
137
138 if (type == NAME) {
139 register char *s = str;
140 register label *l = g->g_ll.ll_label;
141 register int i;
142 for (i = n; i > 0; i--, l++) {
143 if (l->lb_type == NAME && l->lb_str != NULL &&
144 l->lb_str[0] == s[0] &&
145 strcmp(l->lb_str, s) == 0) {
146 D(printf("It's a keyword\n"));
147 return n - i;
148 }
149 }
150 }
151
152 {
153 register label *l = g->g_ll.ll_label;
154 register int i;
155 for (i = n; i > 0; i--, l++) {
156 if (l->lb_type == type && l->lb_str == NULL) {
157 D(printf("It's a token we know\n"));
158 return n - i;
159 }
160 }
161 }
162
163 D(printf("Illegal token\n"));
164 return -1;
165}
166
167int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000168PyParser_AddToken(register parser_state *ps, register int type, char *str,
169 int lineno, int *expected_ret)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000170{
171 register int ilabel;
Jeremy Hylton94988062000-06-20 19:10:44 +0000172 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000173
Guido van Rossum86bea461997-04-29 21:03:06 +0000174 D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000175
176 /* Find out which label this token is */
177 ilabel = classify(ps->p_grammar, type, str);
178 if (ilabel < 0)
179 return E_SYNTAX;
180
181 /* Loop until the token is shifted or an error occurred */
182 for (;;) {
183 /* Fetch the current dfa and state */
184 register dfa *d = ps->p_stack.s_top->s_dfa;
185 register state *s = &d->d_state[ps->p_stack.s_top->s_state];
186
187 D(printf(" DFA '%s', state %d:",
188 d->d_name, ps->p_stack.s_top->s_state));
189
190 /* Check accelerator */
191 if (s->s_lower <= ilabel && ilabel < s->s_upper) {
192 register int x = s->s_accel[ilabel - s->s_lower];
193 if (x != -1) {
194 if (x & (1<<7)) {
195 /* Push non-terminal */
196 int nt = (x >> 8) + NT_OFFSET;
197 int arrow = x & ((1<<7)-1);
Guido van Rossum86bea461997-04-29 21:03:06 +0000198 dfa *d1 = PyGrammar_FindDFA(
199 ps->p_grammar, nt);
Jeremy Hylton94988062000-06-20 19:10:44 +0000200 if ((err = push(&ps->p_stack, nt, d1,
201 arrow, lineno)) > 0) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000202 D(printf(" MemError: push\n"));
Jeremy Hylton94988062000-06-20 19:10:44 +0000203 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000204 }
205 D(printf(" Push ...\n"));
206 continue;
207 }
208
209 /* Shift the token */
Jeremy Hylton94988062000-06-20 19:10:44 +0000210 if ((err = shift(&ps->p_stack, type, str,
211 x, lineno)) > 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000212 D(printf(" MemError: shift.\n"));
Jeremy Hylton94988062000-06-20 19:10:44 +0000213 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000214 }
215 D(printf(" Shift.\n"));
216 /* Pop while we are in an accept-only state */
217 while (s = &d->d_state
218 [ps->p_stack.s_top->s_state],
219 s->s_accept && s->s_narcs == 1) {
220 D(printf(" Direct pop.\n"));
221 s_pop(&ps->p_stack);
222 if (s_empty(&ps->p_stack)) {
223 D(printf(" ACCEPT.\n"));
224 return E_DONE;
225 }
226 d = ps->p_stack.s_top->s_dfa;
227 }
228 return E_OK;
229 }
230 }
231
232 if (s->s_accept) {
233 /* Pop this dfa and try again */
234 s_pop(&ps->p_stack);
235 D(printf(" Pop ...\n"));
236 if (s_empty(&ps->p_stack)) {
237 D(printf(" Error: bottom of stack.\n"));
238 return E_SYNTAX;
239 }
240 continue;
241 }
242
243 /* Stuck, report syntax error */
244 D(printf(" Error.\n"));
Fred Drake85f36392000-07-11 17:53:00 +0000245 if (expected_ret) {
246 if (s->s_lower == s->s_upper - 1) {
247 /* Only one possible expected token */
248 *expected_ret = ps->p_grammar->
249 g_ll.ll_label[s->s_lower].lb_type;
250 }
251 else
252 *expected_ret = -1;
253 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000254 return E_SYNTAX;
255 }
256}
257
258
Guido van Rossum408027e1996-12-30 16:17:54 +0000259#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000260
261/* DEBUG OUTPUT */
262
263void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000264dumptree(grammar *g, node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000265{
266 int i;
267
268 if (n == NULL)
269 printf("NIL");
270 else {
271 label l;
272 l.lb_type = TYPE(n);
Guido van Rossumcf7448b1992-09-03 20:46:37 +0000273 l.lb_str = STR(n);
Guido van Rossum86bea461997-04-29 21:03:06 +0000274 printf("%s", PyGrammar_LabelRepr(&l));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000275 if (ISNONTERMINAL(TYPE(n))) {
276 printf("(");
277 for (i = 0; i < NCH(n); i++) {
278 if (i > 0)
279 printf(",");
280 dumptree(g, CHILD(n, i));
281 }
282 printf(")");
283 }
284 }
285}
286
287void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000288showtree(grammar *g, node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000289{
290 int i;
291
292 if (n == NULL)
293 return;
294 if (ISNONTERMINAL(TYPE(n))) {
295 for (i = 0; i < NCH(n); i++)
296 showtree(g, CHILD(n, i));
297 }
298 else if (ISTERMINAL(TYPE(n))) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000299 printf("%s", _PyParser_TokenNames[TYPE(n)]);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000300 if (TYPE(n) == NUMBER || TYPE(n) == NAME)
301 printf("(%s)", STR(n));
302 printf(" ");
303 }
304 else
305 printf("? ");
306}
307
308void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000309printtree(parser_state *ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000310{
Guido van Rossum86bea461997-04-29 21:03:06 +0000311 if (Py_DebugFlag) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000312 printf("Parse tree:\n");
313 dumptree(ps->p_grammar, ps->p_tree);
314 printf("\n");
315 printf("Tokens:\n");
316 showtree(ps->p_grammar, ps->p_tree);
317 printf("\n");
318 }
319 printf("Listing:\n");
Guido van Rossum86bea461997-04-29 21:03:06 +0000320 PyNode_ListTree(ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000321 printf("\n");
322}
323
Guido van Rossum408027e1996-12-30 16:17:54 +0000324#endif /* Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000325
326/*
327
328Description
329-----------
330
331The parser's interface is different than usual: the function addtoken()
332must be called for each token in the input. This makes it possible to
333turn it into an incremental parsing system later. The parsing system
334constructs a parse tree as it goes.
335
336A parsing rule is represented as a Deterministic Finite-state Automaton
337(DFA). A node in a DFA represents a state of the parser; an arc represents
338a transition. Transitions are either labeled with terminal symbols or
339with non-terminals. When the parser decides to follow an arc labeled
340with a non-terminal, it is invoked recursively with the DFA representing
341the parsing rule for that as its initial state; when that DFA accepts,
342the parser that invoked it continues. The parse tree constructed by the
343recursively called parser is inserted as a child in the current parse tree.
344
345The DFA's can be constructed automatically from a more conventional
346language description. An extended LL(1) grammar (ELL(1)) is suitable.
347Certain restrictions make the parser's life easier: rules that can produce
348the empty string should be outlawed (there are other ways to put loops
349or optional parts in the language). To avoid the need to construct
350FIRST sets, we can require that all but the last alternative of a rule
351(really: arc going out of a DFA's state) must begin with a terminal
352symbol.
353
354As an example, consider this grammar:
355
356expr: term (OP term)*
357term: CONSTANT | '(' expr ')'
358
359The DFA corresponding to the rule for expr is:
360
361------->.---term-->.------->
362 ^ |
363 | |
364 \----OP----/
365
366The parse tree generated for the input a+b is:
367
368(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
369
370*/