blob: e6fc180ee6fe15b5ff78f84ad9bc192cd2c1ba3a [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumb9f8d6e1995-01-04 19:08:09 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* Parser implementation */
33
34/* For a description, see the comments at end of this file */
35
36/* XXX To do: error recovery */
37
Guido van Rossum3f5da241990-12-20 15:06:42 +000038#include "pgenheaders.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000039#include "assert.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000040#include "token.h"
41#include "grammar.h"
42#include "node.h"
43#include "parser.h"
44#include "errcode.h"
45
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000046
47#ifdef DEBUG
Guido van Rossum3f5da241990-12-20 15:06:42 +000048extern int debugging;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000049#define D(x) if (!debugging); else x
50#else
51#define D(x)
52#endif
53
54
55/* STACK DATA TYPE */
56
57static void s_reset PROTO((stack *));
58
59static void
60s_reset(s)
61 stack *s;
62{
63 s->s_top = &s->s_base[MAXSTACK];
64}
65
66#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
67
68static int s_push PROTO((stack *, dfa *, node *));
69
70static int
71s_push(s, d, parent)
72 register stack *s;
73 dfa *d;
74 node *parent;
75{
76 register stackentry *top;
77 if (s->s_top == s->s_base) {
78 fprintf(stderr, "s_push: parser stack overflow\n");
79 return -1;
80 }
81 top = --s->s_top;
82 top->s_dfa = d;
83 top->s_parent = parent;
84 top->s_state = 0;
85 return 0;
86}
87
88#ifdef DEBUG
89
90static void s_pop PROTO((stack *));
91
92static void
93s_pop(s)
94 register stack *s;
95{
Guido van Rossum588633d1994-12-30 15:46:02 +000096 if (s_empty(s))
97 fatal("s_pop: parser stack underflow -- FATAL");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000098 s->s_top++;
99}
100
101#else /* !DEBUG */
102
103#define s_pop(s) (s)->s_top++
104
105#endif
106
107
108/* PARSER CREATION */
109
110parser_state *
111newparser(g, start)
112 grammar *g;
113 int start;
114{
115 parser_state *ps;
116
117 if (!g->g_accel)
118 addaccelerators(g);
119 ps = NEW(parser_state, 1);
120 if (ps == NULL)
121 return NULL;
122 ps->p_grammar = g;
Guido van Rossum3f5da241990-12-20 15:06:42 +0000123 ps->p_tree = newtree(start);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000124 if (ps->p_tree == NULL) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000125 DEL(ps);
126 return NULL;
127 }
128 s_reset(&ps->p_stack);
129 (void) s_push(&ps->p_stack, finddfa(g, start), ps->p_tree);
130 return ps;
131}
132
133void
134delparser(ps)
135 parser_state *ps;
136{
Guido van Rossum99f02d41990-11-18 17:38:42 +0000137 /* NB If you want to save the parse tree,
138 you must set p_tree to NULL before calling delparser! */
Guido van Rossum3f5da241990-12-20 15:06:42 +0000139 freetree(ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000140 DEL(ps);
141}
142
143
144/* PARSER STACK OPERATIONS */
145
Guido van Rossum3f5da241990-12-20 15:06:42 +0000146static int shift PROTO((stack *, int, char *, int, int));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000147
148static int
Guido van Rossum3f5da241990-12-20 15:06:42 +0000149shift(s, type, str, newstate, lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000150 register stack *s;
151 int type;
152 char *str;
153 int newstate;
Guido van Rossum3f5da241990-12-20 15:06:42 +0000154 int lineno;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000155{
156 assert(!s_empty(s));
Guido van Rossum3f5da241990-12-20 15:06:42 +0000157 if (addchild(s->s_top->s_parent, type, str, lineno) == NULL) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000158 fprintf(stderr, "shift: no mem in addchild\n");
159 return -1;
160 }
161 s->s_top->s_state = newstate;
162 return 0;
163}
164
Guido van Rossum3f5da241990-12-20 15:06:42 +0000165static int push PROTO((stack *, int, dfa *, int, int));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000166
167static int
Guido van Rossum3f5da241990-12-20 15:06:42 +0000168push(s, type, d, newstate, lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000169 register stack *s;
170 int type;
171 dfa *d;
172 int newstate;
Guido van Rossum3f5da241990-12-20 15:06:42 +0000173 int lineno;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000174{
175 register node *n;
176 n = s->s_top->s_parent;
177 assert(!s_empty(s));
Guido van Rossum3f5da241990-12-20 15:06:42 +0000178 if (addchild(n, type, (char *)NULL, lineno) == NULL) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000179 fprintf(stderr, "push: no mem in addchild\n");
180 return -1;
181 }
182 s->s_top->s_state = newstate;
183 return s_push(s, d, CHILD(n, NCH(n)-1));
184}
185
186
187/* PARSER PROPER */
188
189static int classify PROTO((grammar *, int, char *));
190
191static int
192classify(g, type, str)
193 grammar *g;
194 register int type;
195 char *str;
196{
197 register int n = g->g_ll.ll_nlabels;
198
199 if (type == NAME) {
200 register char *s = str;
201 register label *l = g->g_ll.ll_label;
202 register int i;
203 for (i = n; i > 0; i--, l++) {
204 if (l->lb_type == NAME && l->lb_str != NULL &&
205 l->lb_str[0] == s[0] &&
206 strcmp(l->lb_str, s) == 0) {
207 D(printf("It's a keyword\n"));
208 return n - i;
209 }
210 }
211 }
212
213 {
214 register label *l = g->g_ll.ll_label;
215 register int i;
216 for (i = n; i > 0; i--, l++) {
217 if (l->lb_type == type && l->lb_str == NULL) {
218 D(printf("It's a token we know\n"));
219 return n - i;
220 }
221 }
222 }
223
224 D(printf("Illegal token\n"));
225 return -1;
226}
227
228int
Guido van Rossum3f5da241990-12-20 15:06:42 +0000229addtoken(ps, type, str, lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000230 register parser_state *ps;
231 register int type;
232 char *str;
Guido van Rossum3f5da241990-12-20 15:06:42 +0000233 int lineno;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000234{
235 register int ilabel;
236
237 D(printf("Token %s/'%s' ... ", tok_name[type], str));
238
239 /* Find out which label this token is */
240 ilabel = classify(ps->p_grammar, type, str);
241 if (ilabel < 0)
242 return E_SYNTAX;
243
244 /* Loop until the token is shifted or an error occurred */
245 for (;;) {
246 /* Fetch the current dfa and state */
247 register dfa *d = ps->p_stack.s_top->s_dfa;
248 register state *s = &d->d_state[ps->p_stack.s_top->s_state];
249
250 D(printf(" DFA '%s', state %d:",
251 d->d_name, ps->p_stack.s_top->s_state));
252
253 /* Check accelerator */
254 if (s->s_lower <= ilabel && ilabel < s->s_upper) {
255 register int x = s->s_accel[ilabel - s->s_lower];
256 if (x != -1) {
257 if (x & (1<<7)) {
258 /* Push non-terminal */
259 int nt = (x >> 8) + NT_OFFSET;
260 int arrow = x & ((1<<7)-1);
261 dfa *d1 = finddfa(ps->p_grammar, nt);
Guido van Rossum3f5da241990-12-20 15:06:42 +0000262 if (push(&ps->p_stack, nt, d1,
263 arrow, lineno) < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264 D(printf(" MemError: push.\n"));
265 return E_NOMEM;
266 }
267 D(printf(" Push ...\n"));
268 continue;
269 }
270
271 /* Shift the token */
Guido van Rossum3f5da241990-12-20 15:06:42 +0000272 if (shift(&ps->p_stack, type, str,
273 x, lineno) < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000274 D(printf(" MemError: shift.\n"));
275 return E_NOMEM;
276 }
277 D(printf(" Shift.\n"));
278 /* Pop while we are in an accept-only state */
279 while (s = &d->d_state
280 [ps->p_stack.s_top->s_state],
281 s->s_accept && s->s_narcs == 1) {
282 D(printf(" Direct pop.\n"));
283 s_pop(&ps->p_stack);
284 if (s_empty(&ps->p_stack)) {
285 D(printf(" ACCEPT.\n"));
286 return E_DONE;
287 }
288 d = ps->p_stack.s_top->s_dfa;
289 }
290 return E_OK;
291 }
292 }
293
294 if (s->s_accept) {
295 /* Pop this dfa and try again */
296 s_pop(&ps->p_stack);
297 D(printf(" Pop ...\n"));
298 if (s_empty(&ps->p_stack)) {
299 D(printf(" Error: bottom of stack.\n"));
300 return E_SYNTAX;
301 }
302 continue;
303 }
304
305 /* Stuck, report syntax error */
306 D(printf(" Error.\n"));
307 return E_SYNTAX;
308 }
309}
310
311
312#ifdef DEBUG
313
314/* DEBUG OUTPUT */
315
316void
317dumptree(g, n)
318 grammar *g;
319 node *n;
320{
321 int i;
322
323 if (n == NULL)
324 printf("NIL");
325 else {
326 label l;
327 l.lb_type = TYPE(n);
Guido van Rossumcf7448b1992-09-03 20:46:37 +0000328 l.lb_str = STR(n);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000329 printf("%s", labelrepr(&l));
330 if (ISNONTERMINAL(TYPE(n))) {
331 printf("(");
332 for (i = 0; i < NCH(n); i++) {
333 if (i > 0)
334 printf(",");
335 dumptree(g, CHILD(n, i));
336 }
337 printf(")");
338 }
339 }
340}
341
342void
343showtree(g, n)
344 grammar *g;
345 node *n;
346{
347 int i;
348
349 if (n == NULL)
350 return;
351 if (ISNONTERMINAL(TYPE(n))) {
352 for (i = 0; i < NCH(n); i++)
353 showtree(g, CHILD(n, i));
354 }
355 else if (ISTERMINAL(TYPE(n))) {
356 printf("%s", tok_name[TYPE(n)]);
357 if (TYPE(n) == NUMBER || TYPE(n) == NAME)
358 printf("(%s)", STR(n));
359 printf(" ");
360 }
361 else
362 printf("? ");
363}
364
365void
366printtree(ps)
367 parser_state *ps;
368{
369 if (debugging) {
370 printf("Parse tree:\n");
371 dumptree(ps->p_grammar, ps->p_tree);
372 printf("\n");
373 printf("Tokens:\n");
374 showtree(ps->p_grammar, ps->p_tree);
375 printf("\n");
376 }
377 printf("Listing:\n");
378 listtree(ps->p_tree);
379 printf("\n");
380}
381
382#endif /* DEBUG */
383
384/*
385
386Description
387-----------
388
389The parser's interface is different than usual: the function addtoken()
390must be called for each token in the input. This makes it possible to
391turn it into an incremental parsing system later. The parsing system
392constructs a parse tree as it goes.
393
394A parsing rule is represented as a Deterministic Finite-state Automaton
395(DFA). A node in a DFA represents a state of the parser; an arc represents
396a transition. Transitions are either labeled with terminal symbols or
397with non-terminals. When the parser decides to follow an arc labeled
398with a non-terminal, it is invoked recursively with the DFA representing
399the parsing rule for that as its initial state; when that DFA accepts,
400the parser that invoked it continues. The parse tree constructed by the
401recursively called parser is inserted as a child in the current parse tree.
402
403The DFA's can be constructed automatically from a more conventional
404language description. An extended LL(1) grammar (ELL(1)) is suitable.
405Certain restrictions make the parser's life easier: rules that can produce
406the empty string should be outlawed (there are other ways to put loops
407or optional parts in the language). To avoid the need to construct
408FIRST sets, we can require that all but the last alternative of a rule
409(really: arc going out of a DFA's state) must begin with a terminal
410symbol.
411
412As an example, consider this grammar:
413
414expr: term (OP term)*
415term: CONSTANT | '(' expr ')'
416
417The DFA corresponding to the rule for expr is:
418
419------->.---term-->.------->
420 ^ |
421 | |
422 \----OP----/
423
424The parse tree generated for the input a+b is:
425
426(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
427
428*/