blob: 294c534d0ff469244ed2b4138876db96c492c0c6 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* Parser implementation */
2
3/* For a description, see the comments at end of this file */
4
5/* XXX To do: error recovery */
6
7#include <stdio.h>
8#include "assert.h"
9
10#include "PROTO.h"
11#include "malloc.h"
12#include "token.h"
13#include "grammar.h"
14#include "node.h"
15#include "parser.h"
16#include "errcode.h"
17
18extern int debugging;
19
20#ifdef DEBUG
21#define D(x) if (!debugging); else x
22#else
23#define D(x)
24#endif
25
26
27/* STACK DATA TYPE */
28
29static void s_reset PROTO((stack *));
30
31static void
32s_reset(s)
33 stack *s;
34{
35 s->s_top = &s->s_base[MAXSTACK];
36}
37
38#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
39
40static int s_push PROTO((stack *, dfa *, node *));
41
42static int
43s_push(s, d, parent)
44 register stack *s;
45 dfa *d;
46 node *parent;
47{
48 register stackentry *top;
49 if (s->s_top == s->s_base) {
50 fprintf(stderr, "s_push: parser stack overflow\n");
51 return -1;
52 }
53 top = --s->s_top;
54 top->s_dfa = d;
55 top->s_parent = parent;
56 top->s_state = 0;
57 return 0;
58}
59
60#ifdef DEBUG
61
62static void s_pop PROTO((stack *));
63
64static void
65s_pop(s)
66 register stack *s;
67{
68 if (s_empty(s)) {
69 fprintf(stderr, "s_pop: parser stack underflow -- FATAL\n");
70 abort();
71 }
72 s->s_top++;
73}
74
75#else /* !DEBUG */
76
77#define s_pop(s) (s)->s_top++
78
79#endif
80
81
82/* PARSER CREATION */
83
84parser_state *
85newparser(g, start)
86 grammar *g;
87 int start;
88{
89 parser_state *ps;
90
91 if (!g->g_accel)
92 addaccelerators(g);
93 ps = NEW(parser_state, 1);
94 if (ps == NULL)
95 return NULL;
96 ps->p_grammar = g;
97 ps->p_tree = newnode(start);
98 if (ps->p_tree == NULL) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099 DEL(ps);
100 return NULL;
101 }
102 s_reset(&ps->p_stack);
103 (void) s_push(&ps->p_stack, finddfa(g, start), ps->p_tree);
104 return ps;
105}
106
107void
108delparser(ps)
109 parser_state *ps;
110{
Guido van Rossum99f02d41990-11-18 17:38:42 +0000111 /* NB If you want to save the parse tree,
112 you must set p_tree to NULL before calling delparser! */
113 freenode(ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000114 DEL(ps);
115}
116
117
118/* PARSER STACK OPERATIONS */
119
120static int shift PROTO((stack *, int, char *, int));
121
122static int
123shift(s, type, str, newstate)
124 register stack *s;
125 int type;
126 char *str;
127 int newstate;
128{
129 assert(!s_empty(s));
130 if (addchild(s->s_top->s_parent, type, str) == NULL) {
131 fprintf(stderr, "shift: no mem in addchild\n");
132 return -1;
133 }
134 s->s_top->s_state = newstate;
135 return 0;
136}
137
138static int push PROTO((stack *, int, dfa *, int));
139
140static int
141push(s, type, d, newstate)
142 register stack *s;
143 int type;
144 dfa *d;
145 int newstate;
146{
147 register node *n;
148 n = s->s_top->s_parent;
149 assert(!s_empty(s));
150 if (addchild(n, type, (char *)NULL) == NULL) {
151 fprintf(stderr, "push: no mem in addchild\n");
152 return -1;
153 }
154 s->s_top->s_state = newstate;
155 return s_push(s, d, CHILD(n, NCH(n)-1));
156}
157
158
159/* PARSER PROPER */
160
161static int classify PROTO((grammar *, int, char *));
162
163static int
164classify(g, type, str)
165 grammar *g;
166 register int type;
167 char *str;
168{
169 register int n = g->g_ll.ll_nlabels;
170
171 if (type == NAME) {
172 register char *s = str;
173 register label *l = g->g_ll.ll_label;
174 register int i;
175 for (i = n; i > 0; i--, l++) {
176 if (l->lb_type == NAME && l->lb_str != NULL &&
177 l->lb_str[0] == s[0] &&
178 strcmp(l->lb_str, s) == 0) {
179 D(printf("It's a keyword\n"));
180 return n - i;
181 }
182 }
183 }
184
185 {
186 register label *l = g->g_ll.ll_label;
187 register int i;
188 for (i = n; i > 0; i--, l++) {
189 if (l->lb_type == type && l->lb_str == NULL) {
190 D(printf("It's a token we know\n"));
191 return n - i;
192 }
193 }
194 }
195
196 D(printf("Illegal token\n"));
197 return -1;
198}
199
200int
201addtoken(ps, type, str)
202 register parser_state *ps;
203 register int type;
204 char *str;
205{
206 register int ilabel;
207
208 D(printf("Token %s/'%s' ... ", tok_name[type], str));
209
210 /* Find out which label this token is */
211 ilabel = classify(ps->p_grammar, type, str);
212 if (ilabel < 0)
213 return E_SYNTAX;
214
215 /* Loop until the token is shifted or an error occurred */
216 for (;;) {
217 /* Fetch the current dfa and state */
218 register dfa *d = ps->p_stack.s_top->s_dfa;
219 register state *s = &d->d_state[ps->p_stack.s_top->s_state];
220
221 D(printf(" DFA '%s', state %d:",
222 d->d_name, ps->p_stack.s_top->s_state));
223
224 /* Check accelerator */
225 if (s->s_lower <= ilabel && ilabel < s->s_upper) {
226 register int x = s->s_accel[ilabel - s->s_lower];
227 if (x != -1) {
228 if (x & (1<<7)) {
229 /* Push non-terminal */
230 int nt = (x >> 8) + NT_OFFSET;
231 int arrow = x & ((1<<7)-1);
232 dfa *d1 = finddfa(ps->p_grammar, nt);
233 if (push(&ps->p_stack, nt, d1, arrow) < 0) {
234 D(printf(" MemError: push.\n"));
235 return E_NOMEM;
236 }
237 D(printf(" Push ...\n"));
238 continue;
239 }
240
241 /* Shift the token */
242 if (shift(&ps->p_stack, type, str, x) < 0) {
243 D(printf(" MemError: shift.\n"));
244 return E_NOMEM;
245 }
246 D(printf(" Shift.\n"));
247 /* Pop while we are in an accept-only state */
248 while (s = &d->d_state
249 [ps->p_stack.s_top->s_state],
250 s->s_accept && s->s_narcs == 1) {
251 D(printf(" Direct pop.\n"));
252 s_pop(&ps->p_stack);
253 if (s_empty(&ps->p_stack)) {
254 D(printf(" ACCEPT.\n"));
255 return E_DONE;
256 }
257 d = ps->p_stack.s_top->s_dfa;
258 }
259 return E_OK;
260 }
261 }
262
263 if (s->s_accept) {
264 /* Pop this dfa and try again */
265 s_pop(&ps->p_stack);
266 D(printf(" Pop ...\n"));
267 if (s_empty(&ps->p_stack)) {
268 D(printf(" Error: bottom of stack.\n"));
269 return E_SYNTAX;
270 }
271 continue;
272 }
273
274 /* Stuck, report syntax error */
275 D(printf(" Error.\n"));
276 return E_SYNTAX;
277 }
278}
279
280
281#ifdef DEBUG
282
283/* DEBUG OUTPUT */
284
285void
286dumptree(g, n)
287 grammar *g;
288 node *n;
289{
290 int i;
291
292 if (n == NULL)
293 printf("NIL");
294 else {
295 label l;
296 l.lb_type = TYPE(n);
297 l.lb_str = TYPE(str);
298 printf("%s", labelrepr(&l));
299 if (ISNONTERMINAL(TYPE(n))) {
300 printf("(");
301 for (i = 0; i < NCH(n); i++) {
302 if (i > 0)
303 printf(",");
304 dumptree(g, CHILD(n, i));
305 }
306 printf(")");
307 }
308 }
309}
310
311void
312showtree(g, n)
313 grammar *g;
314 node *n;
315{
316 int i;
317
318 if (n == NULL)
319 return;
320 if (ISNONTERMINAL(TYPE(n))) {
321 for (i = 0; i < NCH(n); i++)
322 showtree(g, CHILD(n, i));
323 }
324 else if (ISTERMINAL(TYPE(n))) {
325 printf("%s", tok_name[TYPE(n)]);
326 if (TYPE(n) == NUMBER || TYPE(n) == NAME)
327 printf("(%s)", STR(n));
328 printf(" ");
329 }
330 else
331 printf("? ");
332}
333
334void
335printtree(ps)
336 parser_state *ps;
337{
338 if (debugging) {
339 printf("Parse tree:\n");
340 dumptree(ps->p_grammar, ps->p_tree);
341 printf("\n");
342 printf("Tokens:\n");
343 showtree(ps->p_grammar, ps->p_tree);
344 printf("\n");
345 }
346 printf("Listing:\n");
347 listtree(ps->p_tree);
348 printf("\n");
349}
350
351#endif /* DEBUG */
352
353/*
354
355Description
356-----------
357
358The parser's interface is different than usual: the function addtoken()
359must be called for each token in the input. This makes it possible to
360turn it into an incremental parsing system later. The parsing system
361constructs a parse tree as it goes.
362
363A parsing rule is represented as a Deterministic Finite-state Automaton
364(DFA). A node in a DFA represents a state of the parser; an arc represents
365a transition. Transitions are either labeled with terminal symbols or
366with non-terminals. When the parser decides to follow an arc labeled
367with a non-terminal, it is invoked recursively with the DFA representing
368the parsing rule for that as its initial state; when that DFA accepts,
369the parser that invoked it continues. The parse tree constructed by the
370recursively called parser is inserted as a child in the current parse tree.
371
372The DFA's can be constructed automatically from a more conventional
373language description. An extended LL(1) grammar (ELL(1)) is suitable.
374Certain restrictions make the parser's life easier: rules that can produce
375the empty string should be outlawed (there are other ways to put loops
376or optional parts in the language). To avoid the need to construct
377FIRST sets, we can require that all but the last alternative of a rule
378(really: arc going out of a DFA's state) must begin with a terminal
379symbol.
380
381As an example, consider this grammar:
382
383expr: term (OP term)*
384term: CONSTANT | '(' expr ')'
385
386The DFA corresponding to the rule for expr is:
387
388------->.---term-->.------->
389 ^ |
390 | |
391 \----OP----/
392
393The parse tree generated for the input a+b is:
394
395(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
396
397*/