blob: fb1a05152a60c8880dd782c94e20e26de0389667 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00002Copyright (c) 2000, BeOpen.com.
3Copyright (c) 1995-2000, Corporation for National Research Initiatives.
4Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
5All rights reserved.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00006
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00007See the file "Misc/COPYRIGHT" for information on usage and
8redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009******************************************************************/
10
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000011/* Parser implementation */
12
13/* For a description, see the comments at end of this file */
14
15/* XXX To do: error recovery */
16
Guido van Rossum3f5da241990-12-20 15:06:42 +000017#include "pgenheaders.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000018#include "assert.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000019#include "token.h"
20#include "grammar.h"
21#include "node.h"
22#include "parser.h"
23#include "errcode.h"
24
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000025
Guido van Rossum408027e1996-12-30 16:17:54 +000026#ifdef Py_DEBUG
Guido van Rossum86bea461997-04-29 21:03:06 +000027extern int Py_DebugFlag;
28#define D(x) if (!Py_DebugFlag); else x
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000029#else
30#define D(x)
31#endif
32
33
34/* STACK DATA TYPE */
35
Guido van Rossum86bea461997-04-29 21:03:06 +000036static void s_reset Py_PROTO((stack *));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000037
38static void
39s_reset(s)
40 stack *s;
41{
42 s->s_top = &s->s_base[MAXSTACK];
43}
44
45#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
46
Guido van Rossum86bea461997-04-29 21:03:06 +000047static int s_push Py_PROTO((stack *, dfa *, node *));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000048
49static int
50s_push(s, d, parent)
51 register stack *s;
52 dfa *d;
53 node *parent;
54{
55 register stackentry *top;
56 if (s->s_top == s->s_base) {
57 fprintf(stderr, "s_push: parser stack overflow\n");
58 return -1;
59 }
60 top = --s->s_top;
61 top->s_dfa = d;
62 top->s_parent = parent;
63 top->s_state = 0;
64 return 0;
65}
66
Guido van Rossum408027e1996-12-30 16:17:54 +000067#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000068
Guido van Rossum86bea461997-04-29 21:03:06 +000069static void s_pop Py_PROTO((stack *));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000070
71static void
72s_pop(s)
73 register stack *s;
74{
Guido van Rossum588633d1994-12-30 15:46:02 +000075 if (s_empty(s))
Guido van Rossum86bea461997-04-29 21:03:06 +000076 Py_FatalError("s_pop: parser stack underflow -- FATAL");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000077 s->s_top++;
78}
79
Guido van Rossum408027e1996-12-30 16:17:54 +000080#else /* !Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000081
82#define s_pop(s) (s)->s_top++
83
84#endif
85
86
87/* PARSER CREATION */
88
89parser_state *
Guido van Rossum86bea461997-04-29 21:03:06 +000090PyParser_New(g, start)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000091 grammar *g;
92 int start;
93{
94 parser_state *ps;
95
96 if (!g->g_accel)
Guido van Rossum86bea461997-04-29 21:03:06 +000097 PyGrammar_AddAccelerators(g);
98 ps = PyMem_NEW(parser_state, 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099 if (ps == NULL)
100 return NULL;
101 ps->p_grammar = g;
Guido van Rossum86bea461997-04-29 21:03:06 +0000102 ps->p_tree = PyNode_New(start);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103 if (ps->p_tree == NULL) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000104 PyMem_DEL(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000105 return NULL;
106 }
107 s_reset(&ps->p_stack);
Guido van Rossum86bea461997-04-29 21:03:06 +0000108 (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000109 return ps;
110}
111
112void
Guido van Rossum86bea461997-04-29 21:03:06 +0000113PyParser_Delete(ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000114 parser_state *ps;
115{
Guido van Rossum99f02d41990-11-18 17:38:42 +0000116 /* NB If you want to save the parse tree,
117 you must set p_tree to NULL before calling delparser! */
Guido van Rossum86bea461997-04-29 21:03:06 +0000118 PyNode_Free(ps->p_tree);
119 PyMem_DEL(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000120}
121
122
123/* PARSER STACK OPERATIONS */
124
Guido van Rossum86bea461997-04-29 21:03:06 +0000125static int shift Py_PROTO((stack *, int, char *, int, int));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000126
127static int
Guido van Rossum3f5da241990-12-20 15:06:42 +0000128shift(s, type, str, newstate, lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000129 register stack *s;
130 int type;
131 char *str;
132 int newstate;
Guido van Rossum3f5da241990-12-20 15:06:42 +0000133 int lineno;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000134{
Jeremy Hylton94988062000-06-20 19:10:44 +0000135 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000136 assert(!s_empty(s));
Jeremy Hylton94988062000-06-20 19:10:44 +0000137 err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno);
138 if (err)
139 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000140 s->s_top->s_state = newstate;
141 return 0;
142}
143
Guido van Rossum86bea461997-04-29 21:03:06 +0000144static int push Py_PROTO((stack *, int, dfa *, int, int));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000145
146static int
Guido van Rossum3f5da241990-12-20 15:06:42 +0000147push(s, type, d, newstate, lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000148 register stack *s;
149 int type;
150 dfa *d;
151 int newstate;
Guido van Rossum3f5da241990-12-20 15:06:42 +0000152 int lineno;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000153{
Jeremy Hylton94988062000-06-20 19:10:44 +0000154 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000155 register node *n;
156 n = s->s_top->s_parent;
157 assert(!s_empty(s));
Jeremy Hylton94988062000-06-20 19:10:44 +0000158 err = PyNode_AddChild(n, type, (char *)NULL, lineno);
159 if (err)
160 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000161 s->s_top->s_state = newstate;
162 return s_push(s, d, CHILD(n, NCH(n)-1));
163}
164
165
166/* PARSER PROPER */
167
Guido van Rossum86bea461997-04-29 21:03:06 +0000168static int classify Py_PROTO((grammar *, int, char *));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000169
170static int
171classify(g, type, str)
172 grammar *g;
173 register int type;
174 char *str;
175{
176 register int n = g->g_ll.ll_nlabels;
177
178 if (type == NAME) {
179 register char *s = str;
180 register label *l = g->g_ll.ll_label;
181 register int i;
182 for (i = n; i > 0; i--, l++) {
183 if (l->lb_type == NAME && l->lb_str != NULL &&
184 l->lb_str[0] == s[0] &&
185 strcmp(l->lb_str, s) == 0) {
186 D(printf("It's a keyword\n"));
187 return n - i;
188 }
189 }
190 }
191
192 {
193 register label *l = g->g_ll.ll_label;
194 register int i;
195 for (i = n; i > 0; i--, l++) {
196 if (l->lb_type == type && l->lb_str == NULL) {
197 D(printf("It's a token we know\n"));
198 return n - i;
199 }
200 }
201 }
202
203 D(printf("Illegal token\n"));
204 return -1;
205}
206
207int
Guido van Rossum86bea461997-04-29 21:03:06 +0000208PyParser_AddToken(ps, type, str, lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000209 register parser_state *ps;
210 register int type;
211 char *str;
Guido van Rossum3f5da241990-12-20 15:06:42 +0000212 int lineno;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000213{
214 register int ilabel;
Jeremy Hylton94988062000-06-20 19:10:44 +0000215 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000216
Guido van Rossum86bea461997-04-29 21:03:06 +0000217 D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000218
219 /* Find out which label this token is */
220 ilabel = classify(ps->p_grammar, type, str);
221 if (ilabel < 0)
222 return E_SYNTAX;
223
224 /* Loop until the token is shifted or an error occurred */
225 for (;;) {
226 /* Fetch the current dfa and state */
227 register dfa *d = ps->p_stack.s_top->s_dfa;
228 register state *s = &d->d_state[ps->p_stack.s_top->s_state];
229
230 D(printf(" DFA '%s', state %d:",
231 d->d_name, ps->p_stack.s_top->s_state));
232
233 /* Check accelerator */
234 if (s->s_lower <= ilabel && ilabel < s->s_upper) {
235 register int x = s->s_accel[ilabel - s->s_lower];
236 if (x != -1) {
237 if (x & (1<<7)) {
238 /* Push non-terminal */
239 int nt = (x >> 8) + NT_OFFSET;
240 int arrow = x & ((1<<7)-1);
Guido van Rossum86bea461997-04-29 21:03:06 +0000241 dfa *d1 = PyGrammar_FindDFA(
242 ps->p_grammar, nt);
Jeremy Hylton94988062000-06-20 19:10:44 +0000243 if ((err = push(&ps->p_stack, nt, d1,
244 arrow, lineno)) > 0) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000245 D(printf(" MemError: push\n"));
Jeremy Hylton94988062000-06-20 19:10:44 +0000246 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000247 }
248 D(printf(" Push ...\n"));
249 continue;
250 }
251
252 /* Shift the token */
Jeremy Hylton94988062000-06-20 19:10:44 +0000253 if ((err = shift(&ps->p_stack, type, str,
254 x, lineno)) > 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000255 D(printf(" MemError: shift.\n"));
Jeremy Hylton94988062000-06-20 19:10:44 +0000256 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000257 }
258 D(printf(" Shift.\n"));
259 /* Pop while we are in an accept-only state */
260 while (s = &d->d_state
261 [ps->p_stack.s_top->s_state],
262 s->s_accept && s->s_narcs == 1) {
263 D(printf(" Direct pop.\n"));
264 s_pop(&ps->p_stack);
265 if (s_empty(&ps->p_stack)) {
266 D(printf(" ACCEPT.\n"));
267 return E_DONE;
268 }
269 d = ps->p_stack.s_top->s_dfa;
270 }
271 return E_OK;
272 }
273 }
274
275 if (s->s_accept) {
276 /* Pop this dfa and try again */
277 s_pop(&ps->p_stack);
278 D(printf(" Pop ...\n"));
279 if (s_empty(&ps->p_stack)) {
280 D(printf(" Error: bottom of stack.\n"));
281 return E_SYNTAX;
282 }
283 continue;
284 }
285
286 /* Stuck, report syntax error */
287 D(printf(" Error.\n"));
288 return E_SYNTAX;
289 }
290}
291
292
Guido van Rossum408027e1996-12-30 16:17:54 +0000293#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000294
295/* DEBUG OUTPUT */
296
297void
298dumptree(g, n)
299 grammar *g;
300 node *n;
301{
302 int i;
303
304 if (n == NULL)
305 printf("NIL");
306 else {
307 label l;
308 l.lb_type = TYPE(n);
Guido van Rossumcf7448b1992-09-03 20:46:37 +0000309 l.lb_str = STR(n);
Guido van Rossum86bea461997-04-29 21:03:06 +0000310 printf("%s", PyGrammar_LabelRepr(&l));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000311 if (ISNONTERMINAL(TYPE(n))) {
312 printf("(");
313 for (i = 0; i < NCH(n); i++) {
314 if (i > 0)
315 printf(",");
316 dumptree(g, CHILD(n, i));
317 }
318 printf(")");
319 }
320 }
321}
322
323void
324showtree(g, n)
325 grammar *g;
326 node *n;
327{
328 int i;
329
330 if (n == NULL)
331 return;
332 if (ISNONTERMINAL(TYPE(n))) {
333 for (i = 0; i < NCH(n); i++)
334 showtree(g, CHILD(n, i));
335 }
336 else if (ISTERMINAL(TYPE(n))) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000337 printf("%s", _PyParser_TokenNames[TYPE(n)]);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000338 if (TYPE(n) == NUMBER || TYPE(n) == NAME)
339 printf("(%s)", STR(n));
340 printf(" ");
341 }
342 else
343 printf("? ");
344}
345
346void
347printtree(ps)
348 parser_state *ps;
349{
Guido van Rossum86bea461997-04-29 21:03:06 +0000350 if (Py_DebugFlag) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000351 printf("Parse tree:\n");
352 dumptree(ps->p_grammar, ps->p_tree);
353 printf("\n");
354 printf("Tokens:\n");
355 showtree(ps->p_grammar, ps->p_tree);
356 printf("\n");
357 }
358 printf("Listing:\n");
Guido van Rossum86bea461997-04-29 21:03:06 +0000359 PyNode_ListTree(ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000360 printf("\n");
361}
362
Guido van Rossum408027e1996-12-30 16:17:54 +0000363#endif /* Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000364
365/*
366
367Description
368-----------
369
370The parser's interface is different than usual: the function addtoken()
371must be called for each token in the input. This makes it possible to
372turn it into an incremental parsing system later. The parsing system
373constructs a parse tree as it goes.
374
375A parsing rule is represented as a Deterministic Finite-state Automaton
376(DFA). A node in a DFA represents a state of the parser; an arc represents
377a transition. Transitions are either labeled with terminal symbols or
378with non-terminals. When the parser decides to follow an arc labeled
379with a non-terminal, it is invoked recursively with the DFA representing
380the parsing rule for that as its initial state; when that DFA accepts,
381the parser that invoked it continues. The parse tree constructed by the
382recursively called parser is inserted as a child in the current parse tree.
383
384The DFA's can be constructed automatically from a more conventional
385language description. An extended LL(1) grammar (ELL(1)) is suitable.
386Certain restrictions make the parser's life easier: rules that can produce
387the empty string should be outlawed (there are other ways to put loops
388or optional parts in the language). To avoid the need to construct
389FIRST sets, we can require that all but the last alternative of a rule
390(really: arc going out of a DFA's state) must begin with a terminal
391symbol.
392
393As an example, consider this grammar:
394
395expr: term (OP term)*
396term: CONSTANT | '(' expr ')'
397
398The DFA corresponding to the rule for expr is:
399
400------->.---term-->.------->
401 ^ |
402 | |
403 \----OP----/
404
405The parse tree generated for the input a+b is:
406
407(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
408
409*/