blob: b74de7fda5485412c489fb66b5f12917ce6a01e7 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00002Copyright (c) 2000, BeOpen.com.
3Copyright (c) 1995-2000, Corporation for National Research Initiatives.
4Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
5All rights reserved.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00006
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00007See the file "Misc/COPYRIGHT" for information on usage and
8redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009******************************************************************/
10
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000011/* Parser implementation */
12
13/* For a description, see the comments at end of this file */
14
15/* XXX To do: error recovery */
16
Guido van Rossum3f5da241990-12-20 15:06:42 +000017#include "pgenheaders.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000018#include "assert.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000019#include "token.h"
20#include "grammar.h"
21#include "node.h"
22#include "parser.h"
23#include "errcode.h"
24
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000025
Guido van Rossum408027e1996-12-30 16:17:54 +000026#ifdef Py_DEBUG
Guido van Rossum86bea461997-04-29 21:03:06 +000027extern int Py_DebugFlag;
28#define D(x) if (!Py_DebugFlag); else x
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000029#else
30#define D(x)
31#endif
32
33
34/* STACK DATA TYPE */
35
Tim Petersdbd9ba62000-07-09 03:09:57 +000036static void s_reset(stack *);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000037
38static void
39s_reset(s)
40 stack *s;
41{
42 s->s_top = &s->s_base[MAXSTACK];
43}
44
45#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
46
Tim Petersdbd9ba62000-07-09 03:09:57 +000047static int s_push(stack *, dfa *, node *);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000048
49static int
50s_push(s, d, parent)
51 register stack *s;
52 dfa *d;
53 node *parent;
54{
55 register stackentry *top;
56 if (s->s_top == s->s_base) {
57 fprintf(stderr, "s_push: parser stack overflow\n");
58 return -1;
59 }
60 top = --s->s_top;
61 top->s_dfa = d;
62 top->s_parent = parent;
63 top->s_state = 0;
64 return 0;
65}
66
Guido van Rossum408027e1996-12-30 16:17:54 +000067#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000068
Tim Petersdbd9ba62000-07-09 03:09:57 +000069static void s_pop(stack *);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000070
71static void
72s_pop(s)
73 register stack *s;
74{
Guido van Rossum588633d1994-12-30 15:46:02 +000075 if (s_empty(s))
Guido van Rossum86bea461997-04-29 21:03:06 +000076 Py_FatalError("s_pop: parser stack underflow -- FATAL");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000077 s->s_top++;
78}
79
Guido van Rossum408027e1996-12-30 16:17:54 +000080#else /* !Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000081
82#define s_pop(s) (s)->s_top++
83
84#endif
85
86
87/* PARSER CREATION */
88
89parser_state *
Guido van Rossum86bea461997-04-29 21:03:06 +000090PyParser_New(g, start)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000091 grammar *g;
92 int start;
93{
94 parser_state *ps;
95
96 if (!g->g_accel)
Guido van Rossum86bea461997-04-29 21:03:06 +000097 PyGrammar_AddAccelerators(g);
98 ps = PyMem_NEW(parser_state, 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099 if (ps == NULL)
100 return NULL;
101 ps->p_grammar = g;
Guido van Rossum86bea461997-04-29 21:03:06 +0000102 ps->p_tree = PyNode_New(start);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103 if (ps->p_tree == NULL) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000104 PyMem_DEL(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000105 return NULL;
106 }
107 s_reset(&ps->p_stack);
Guido van Rossum86bea461997-04-29 21:03:06 +0000108 (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000109 return ps;
110}
111
112void
Guido van Rossum86bea461997-04-29 21:03:06 +0000113PyParser_Delete(ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000114 parser_state *ps;
115{
Guido van Rossum99f02d41990-11-18 17:38:42 +0000116 /* NB If you want to save the parse tree,
117 you must set p_tree to NULL before calling delparser! */
Guido van Rossum86bea461997-04-29 21:03:06 +0000118 PyNode_Free(ps->p_tree);
119 PyMem_DEL(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000120}
121
122
123/* PARSER STACK OPERATIONS */
124
Tim Petersdbd9ba62000-07-09 03:09:57 +0000125static int shift(stack *, int, char *, int, int);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000126
127static int
Guido van Rossum3f5da241990-12-20 15:06:42 +0000128shift(s, type, str, newstate, lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000129 register stack *s;
130 int type;
131 char *str;
132 int newstate;
Guido van Rossum3f5da241990-12-20 15:06:42 +0000133 int lineno;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000134{
Jeremy Hylton94988062000-06-20 19:10:44 +0000135 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000136 assert(!s_empty(s));
Jeremy Hylton94988062000-06-20 19:10:44 +0000137 err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno);
138 if (err)
139 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000140 s->s_top->s_state = newstate;
141 return 0;
142}
143
Tim Petersdbd9ba62000-07-09 03:09:57 +0000144static int push(stack *, int, dfa *, int, int);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000145
146static int
Guido van Rossum3f5da241990-12-20 15:06:42 +0000147push(s, type, d, newstate, lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000148 register stack *s;
149 int type;
150 dfa *d;
151 int newstate;
Guido van Rossum3f5da241990-12-20 15:06:42 +0000152 int lineno;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000153{
Jeremy Hylton94988062000-06-20 19:10:44 +0000154 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000155 register node *n;
156 n = s->s_top->s_parent;
157 assert(!s_empty(s));
Jeremy Hylton94988062000-06-20 19:10:44 +0000158 err = PyNode_AddChild(n, type, (char *)NULL, lineno);
159 if (err)
160 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000161 s->s_top->s_state = newstate;
162 return s_push(s, d, CHILD(n, NCH(n)-1));
163}
164
165
166/* PARSER PROPER */
167
Tim Petersdbd9ba62000-07-09 03:09:57 +0000168static int classify(grammar *, int, char *);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000169
170static int
171classify(g, type, str)
172 grammar *g;
173 register int type;
174 char *str;
175{
176 register int n = g->g_ll.ll_nlabels;
177
178 if (type == NAME) {
179 register char *s = str;
180 register label *l = g->g_ll.ll_label;
181 register int i;
182 for (i = n; i > 0; i--, l++) {
183 if (l->lb_type == NAME && l->lb_str != NULL &&
184 l->lb_str[0] == s[0] &&
185 strcmp(l->lb_str, s) == 0) {
186 D(printf("It's a keyword\n"));
187 return n - i;
188 }
189 }
190 }
191
192 {
193 register label *l = g->g_ll.ll_label;
194 register int i;
195 for (i = n; i > 0; i--, l++) {
196 if (l->lb_type == type && l->lb_str == NULL) {
197 D(printf("It's a token we know\n"));
198 return n - i;
199 }
200 }
201 }
202
203 D(printf("Illegal token\n"));
204 return -1;
205}
206
207int
Fred Drake85f36392000-07-11 17:53:00 +0000208PyParser_AddToken(ps, type, str, lineno, expected_ret)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000209 register parser_state *ps;
210 register int type;
211 char *str;
Guido van Rossum3f5da241990-12-20 15:06:42 +0000212 int lineno;
Fred Drake85f36392000-07-11 17:53:00 +0000213 int *expected_ret;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000214{
215 register int ilabel;
Jeremy Hylton94988062000-06-20 19:10:44 +0000216 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000217
Guido van Rossum86bea461997-04-29 21:03:06 +0000218 D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000219
220 /* Find out which label this token is */
221 ilabel = classify(ps->p_grammar, type, str);
222 if (ilabel < 0)
223 return E_SYNTAX;
224
225 /* Loop until the token is shifted or an error occurred */
226 for (;;) {
227 /* Fetch the current dfa and state */
228 register dfa *d = ps->p_stack.s_top->s_dfa;
229 register state *s = &d->d_state[ps->p_stack.s_top->s_state];
230
231 D(printf(" DFA '%s', state %d:",
232 d->d_name, ps->p_stack.s_top->s_state));
233
234 /* Check accelerator */
235 if (s->s_lower <= ilabel && ilabel < s->s_upper) {
236 register int x = s->s_accel[ilabel - s->s_lower];
237 if (x != -1) {
238 if (x & (1<<7)) {
239 /* Push non-terminal */
240 int nt = (x >> 8) + NT_OFFSET;
241 int arrow = x & ((1<<7)-1);
Guido van Rossum86bea461997-04-29 21:03:06 +0000242 dfa *d1 = PyGrammar_FindDFA(
243 ps->p_grammar, nt);
Jeremy Hylton94988062000-06-20 19:10:44 +0000244 if ((err = push(&ps->p_stack, nt, d1,
245 arrow, lineno)) > 0) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000246 D(printf(" MemError: push\n"));
Jeremy Hylton94988062000-06-20 19:10:44 +0000247 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000248 }
249 D(printf(" Push ...\n"));
250 continue;
251 }
252
253 /* Shift the token */
Jeremy Hylton94988062000-06-20 19:10:44 +0000254 if ((err = shift(&ps->p_stack, type, str,
255 x, lineno)) > 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000256 D(printf(" MemError: shift.\n"));
Jeremy Hylton94988062000-06-20 19:10:44 +0000257 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000258 }
259 D(printf(" Shift.\n"));
260 /* Pop while we are in an accept-only state */
261 while (s = &d->d_state
262 [ps->p_stack.s_top->s_state],
263 s->s_accept && s->s_narcs == 1) {
264 D(printf(" Direct pop.\n"));
265 s_pop(&ps->p_stack);
266 if (s_empty(&ps->p_stack)) {
267 D(printf(" ACCEPT.\n"));
268 return E_DONE;
269 }
270 d = ps->p_stack.s_top->s_dfa;
271 }
272 return E_OK;
273 }
274 }
275
276 if (s->s_accept) {
277 /* Pop this dfa and try again */
278 s_pop(&ps->p_stack);
279 D(printf(" Pop ...\n"));
280 if (s_empty(&ps->p_stack)) {
281 D(printf(" Error: bottom of stack.\n"));
282 return E_SYNTAX;
283 }
284 continue;
285 }
286
287 /* Stuck, report syntax error */
288 D(printf(" Error.\n"));
Fred Drake85f36392000-07-11 17:53:00 +0000289 if (expected_ret) {
290 if (s->s_lower == s->s_upper - 1) {
291 /* Only one possible expected token */
292 *expected_ret = ps->p_grammar->
293 g_ll.ll_label[s->s_lower].lb_type;
294 }
295 else
296 *expected_ret = -1;
297 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000298 return E_SYNTAX;
299 }
300}
301
302
Guido van Rossum408027e1996-12-30 16:17:54 +0000303#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000304
305/* DEBUG OUTPUT */
306
307void
308dumptree(g, n)
309 grammar *g;
310 node *n;
311{
312 int i;
313
314 if (n == NULL)
315 printf("NIL");
316 else {
317 label l;
318 l.lb_type = TYPE(n);
Guido van Rossumcf7448b1992-09-03 20:46:37 +0000319 l.lb_str = STR(n);
Guido van Rossum86bea461997-04-29 21:03:06 +0000320 printf("%s", PyGrammar_LabelRepr(&l));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000321 if (ISNONTERMINAL(TYPE(n))) {
322 printf("(");
323 for (i = 0; i < NCH(n); i++) {
324 if (i > 0)
325 printf(",");
326 dumptree(g, CHILD(n, i));
327 }
328 printf(")");
329 }
330 }
331}
332
333void
334showtree(g, n)
335 grammar *g;
336 node *n;
337{
338 int i;
339
340 if (n == NULL)
341 return;
342 if (ISNONTERMINAL(TYPE(n))) {
343 for (i = 0; i < NCH(n); i++)
344 showtree(g, CHILD(n, i));
345 }
346 else if (ISTERMINAL(TYPE(n))) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000347 printf("%s", _PyParser_TokenNames[TYPE(n)]);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000348 if (TYPE(n) == NUMBER || TYPE(n) == NAME)
349 printf("(%s)", STR(n));
350 printf(" ");
351 }
352 else
353 printf("? ");
354}
355
356void
357printtree(ps)
358 parser_state *ps;
359{
Guido van Rossum86bea461997-04-29 21:03:06 +0000360 if (Py_DebugFlag) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000361 printf("Parse tree:\n");
362 dumptree(ps->p_grammar, ps->p_tree);
363 printf("\n");
364 printf("Tokens:\n");
365 showtree(ps->p_grammar, ps->p_tree);
366 printf("\n");
367 }
368 printf("Listing:\n");
Guido van Rossum86bea461997-04-29 21:03:06 +0000369 PyNode_ListTree(ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000370 printf("\n");
371}
372
Guido van Rossum408027e1996-12-30 16:17:54 +0000373#endif /* Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000374
375/*
376
377Description
378-----------
379
380The parser's interface is different than usual: the function addtoken()
381must be called for each token in the input. This makes it possible to
382turn it into an incremental parsing system later. The parsing system
383constructs a parse tree as it goes.
384
385A parsing rule is represented as a Deterministic Finite-state Automaton
386(DFA). A node in a DFA represents a state of the parser; an arc represents
387a transition. Transitions are either labeled with terminal symbols or
388with non-terminals. When the parser decides to follow an arc labeled
389with a non-terminal, it is invoked recursively with the DFA representing
390the parsing rule for that as its initial state; when that DFA accepts,
391the parser that invoked it continues. The parse tree constructed by the
392recursively called parser is inserted as a child in the current parse tree.
393
394The DFA's can be constructed automatically from a more conventional
395language description. An extended LL(1) grammar (ELL(1)) is suitable.
396Certain restrictions make the parser's life easier: rules that can produce
397the empty string should be outlawed (there are other ways to put loops
398or optional parts in the language). To avoid the need to construct
399FIRST sets, we can require that all but the last alternative of a rule
400(really: arc going out of a DFA's state) must begin with a terminal
401symbol.
402
403As an example, consider this grammar:
404
405expr: term (OP term)*
406term: CONSTANT | '(' expr ')'
407
408The DFA corresponding to the rule for expr is:
409
410------->.---term-->.------->
411 ^ |
412 | |
413 \----OP----/
414
415The parse tree generated for the input a+b is:
416
417(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
418
419*/