blob: c3e5bc0608a5f9230e2ba36945cd8359d94e3f57 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumb9f8d6e1995-01-04 19:08:09 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00007Copyright (c) 2000, BeOpen.com.
8Copyright (c) 1995-2000, Corporation for National Research Initiatives.
9Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
10All rights reserved.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011
Guido van Rossumfd71b9e2000-06-30 23:50:40 +000012See the file "Misc/COPYRIGHT" for information on usage and
13redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000014
15******************************************************************/
16
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000017/* Parser implementation */
18
19/* For a description, see the comments at end of this file */
20
21/* XXX To do: error recovery */
22
Guido van Rossum3f5da241990-12-20 15:06:42 +000023#include "pgenheaders.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000024#include "assert.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000025#include "token.h"
26#include "grammar.h"
27#include "node.h"
28#include "parser.h"
29#include "errcode.h"
30
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000031
Guido van Rossum408027e1996-12-30 16:17:54 +000032#ifdef Py_DEBUG
Guido van Rossum86bea461997-04-29 21:03:06 +000033extern int Py_DebugFlag;
34#define D(x) if (!Py_DebugFlag); else x
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000035#else
36#define D(x)
37#endif
38
39
40/* STACK DATA TYPE */
41
Guido van Rossum86bea461997-04-29 21:03:06 +000042static void s_reset Py_PROTO((stack *));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000043
44static void
45s_reset(s)
46 stack *s;
47{
48 s->s_top = &s->s_base[MAXSTACK];
49}
50
51#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
52
Guido van Rossum86bea461997-04-29 21:03:06 +000053static int s_push Py_PROTO((stack *, dfa *, node *));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054
55static int
56s_push(s, d, parent)
57 register stack *s;
58 dfa *d;
59 node *parent;
60{
61 register stackentry *top;
62 if (s->s_top == s->s_base) {
63 fprintf(stderr, "s_push: parser stack overflow\n");
64 return -1;
65 }
66 top = --s->s_top;
67 top->s_dfa = d;
68 top->s_parent = parent;
69 top->s_state = 0;
70 return 0;
71}
72
Guido van Rossum408027e1996-12-30 16:17:54 +000073#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000074
Guido van Rossum86bea461997-04-29 21:03:06 +000075static void s_pop Py_PROTO((stack *));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000076
77static void
78s_pop(s)
79 register stack *s;
80{
Guido van Rossum588633d1994-12-30 15:46:02 +000081 if (s_empty(s))
Guido van Rossum86bea461997-04-29 21:03:06 +000082 Py_FatalError("s_pop: parser stack underflow -- FATAL");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000083 s->s_top++;
84}
85
Guido van Rossum408027e1996-12-30 16:17:54 +000086#else /* !Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000087
88#define s_pop(s) (s)->s_top++
89
90#endif
91
92
93/* PARSER CREATION */
94
95parser_state *
Guido van Rossum86bea461997-04-29 21:03:06 +000096PyParser_New(g, start)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000097 grammar *g;
98 int start;
99{
100 parser_state *ps;
101
102 if (!g->g_accel)
Guido van Rossum86bea461997-04-29 21:03:06 +0000103 PyGrammar_AddAccelerators(g);
104 ps = PyMem_NEW(parser_state, 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000105 if (ps == NULL)
106 return NULL;
107 ps->p_grammar = g;
Guido van Rossum86bea461997-04-29 21:03:06 +0000108 ps->p_tree = PyNode_New(start);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000109 if (ps->p_tree == NULL) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000110 PyMem_DEL(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000111 return NULL;
112 }
113 s_reset(&ps->p_stack);
Guido van Rossum86bea461997-04-29 21:03:06 +0000114 (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000115 return ps;
116}
117
118void
Guido van Rossum86bea461997-04-29 21:03:06 +0000119PyParser_Delete(ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000120 parser_state *ps;
121{
Guido van Rossum99f02d41990-11-18 17:38:42 +0000122 /* NB If you want to save the parse tree,
123 you must set p_tree to NULL before calling delparser! */
Guido van Rossum86bea461997-04-29 21:03:06 +0000124 PyNode_Free(ps->p_tree);
125 PyMem_DEL(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000126}
127
128
129/* PARSER STACK OPERATIONS */
130
Guido van Rossum86bea461997-04-29 21:03:06 +0000131static int shift Py_PROTO((stack *, int, char *, int, int));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000132
133static int
Guido van Rossum3f5da241990-12-20 15:06:42 +0000134shift(s, type, str, newstate, lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000135 register stack *s;
136 int type;
137 char *str;
138 int newstate;
Guido van Rossum3f5da241990-12-20 15:06:42 +0000139 int lineno;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000140{
Jeremy Hylton94988062000-06-20 19:10:44 +0000141 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000142 assert(!s_empty(s));
Jeremy Hylton94988062000-06-20 19:10:44 +0000143 err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno);
144 if (err)
145 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000146 s->s_top->s_state = newstate;
147 return 0;
148}
149
Guido van Rossum86bea461997-04-29 21:03:06 +0000150static int push Py_PROTO((stack *, int, dfa *, int, int));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000151
152static int
Guido van Rossum3f5da241990-12-20 15:06:42 +0000153push(s, type, d, newstate, lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000154 register stack *s;
155 int type;
156 dfa *d;
157 int newstate;
Guido van Rossum3f5da241990-12-20 15:06:42 +0000158 int lineno;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000159{
Jeremy Hylton94988062000-06-20 19:10:44 +0000160 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000161 register node *n;
162 n = s->s_top->s_parent;
163 assert(!s_empty(s));
Jeremy Hylton94988062000-06-20 19:10:44 +0000164 err = PyNode_AddChild(n, type, (char *)NULL, lineno);
165 if (err)
166 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000167 s->s_top->s_state = newstate;
168 return s_push(s, d, CHILD(n, NCH(n)-1));
169}
170
171
172/* PARSER PROPER */
173
Guido van Rossum86bea461997-04-29 21:03:06 +0000174static int classify Py_PROTO((grammar *, int, char *));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000175
176static int
177classify(g, type, str)
178 grammar *g;
179 register int type;
180 char *str;
181{
182 register int n = g->g_ll.ll_nlabels;
183
184 if (type == NAME) {
185 register char *s = str;
186 register label *l = g->g_ll.ll_label;
187 register int i;
188 for (i = n; i > 0; i--, l++) {
189 if (l->lb_type == NAME && l->lb_str != NULL &&
190 l->lb_str[0] == s[0] &&
191 strcmp(l->lb_str, s) == 0) {
192 D(printf("It's a keyword\n"));
193 return n - i;
194 }
195 }
196 }
197
198 {
199 register label *l = g->g_ll.ll_label;
200 register int i;
201 for (i = n; i > 0; i--, l++) {
202 if (l->lb_type == type && l->lb_str == NULL) {
203 D(printf("It's a token we know\n"));
204 return n - i;
205 }
206 }
207 }
208
209 D(printf("Illegal token\n"));
210 return -1;
211}
212
213int
Guido van Rossum86bea461997-04-29 21:03:06 +0000214PyParser_AddToken(ps, type, str, lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000215 register parser_state *ps;
216 register int type;
217 char *str;
Guido van Rossum3f5da241990-12-20 15:06:42 +0000218 int lineno;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000219{
220 register int ilabel;
Jeremy Hylton94988062000-06-20 19:10:44 +0000221 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000222
Guido van Rossum86bea461997-04-29 21:03:06 +0000223 D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000224
225 /* Find out which label this token is */
226 ilabel = classify(ps->p_grammar, type, str);
227 if (ilabel < 0)
228 return E_SYNTAX;
229
230 /* Loop until the token is shifted or an error occurred */
231 for (;;) {
232 /* Fetch the current dfa and state */
233 register dfa *d = ps->p_stack.s_top->s_dfa;
234 register state *s = &d->d_state[ps->p_stack.s_top->s_state];
235
236 D(printf(" DFA '%s', state %d:",
237 d->d_name, ps->p_stack.s_top->s_state));
238
239 /* Check accelerator */
240 if (s->s_lower <= ilabel && ilabel < s->s_upper) {
241 register int x = s->s_accel[ilabel - s->s_lower];
242 if (x != -1) {
243 if (x & (1<<7)) {
244 /* Push non-terminal */
245 int nt = (x >> 8) + NT_OFFSET;
246 int arrow = x & ((1<<7)-1);
Guido van Rossum86bea461997-04-29 21:03:06 +0000247 dfa *d1 = PyGrammar_FindDFA(
248 ps->p_grammar, nt);
Jeremy Hylton94988062000-06-20 19:10:44 +0000249 if ((err = push(&ps->p_stack, nt, d1,
250 arrow, lineno)) > 0) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000251 D(printf(" MemError: push\n"));
Jeremy Hylton94988062000-06-20 19:10:44 +0000252 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000253 }
254 D(printf(" Push ...\n"));
255 continue;
256 }
257
258 /* Shift the token */
Jeremy Hylton94988062000-06-20 19:10:44 +0000259 if ((err = shift(&ps->p_stack, type, str,
260 x, lineno)) > 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000261 D(printf(" MemError: shift.\n"));
Jeremy Hylton94988062000-06-20 19:10:44 +0000262 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000263 }
264 D(printf(" Shift.\n"));
265 /* Pop while we are in an accept-only state */
266 while (s = &d->d_state
267 [ps->p_stack.s_top->s_state],
268 s->s_accept && s->s_narcs == 1) {
269 D(printf(" Direct pop.\n"));
270 s_pop(&ps->p_stack);
271 if (s_empty(&ps->p_stack)) {
272 D(printf(" ACCEPT.\n"));
273 return E_DONE;
274 }
275 d = ps->p_stack.s_top->s_dfa;
276 }
277 return E_OK;
278 }
279 }
280
281 if (s->s_accept) {
282 /* Pop this dfa and try again */
283 s_pop(&ps->p_stack);
284 D(printf(" Pop ...\n"));
285 if (s_empty(&ps->p_stack)) {
286 D(printf(" Error: bottom of stack.\n"));
287 return E_SYNTAX;
288 }
289 continue;
290 }
291
292 /* Stuck, report syntax error */
293 D(printf(" Error.\n"));
294 return E_SYNTAX;
295 }
296}
297
298
Guido van Rossum408027e1996-12-30 16:17:54 +0000299#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000300
301/* DEBUG OUTPUT */
302
303void
304dumptree(g, n)
305 grammar *g;
306 node *n;
307{
308 int i;
309
310 if (n == NULL)
311 printf("NIL");
312 else {
313 label l;
314 l.lb_type = TYPE(n);
Guido van Rossumcf7448b1992-09-03 20:46:37 +0000315 l.lb_str = STR(n);
Guido van Rossum86bea461997-04-29 21:03:06 +0000316 printf("%s", PyGrammar_LabelRepr(&l));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000317 if (ISNONTERMINAL(TYPE(n))) {
318 printf("(");
319 for (i = 0; i < NCH(n); i++) {
320 if (i > 0)
321 printf(",");
322 dumptree(g, CHILD(n, i));
323 }
324 printf(")");
325 }
326 }
327}
328
329void
330showtree(g, n)
331 grammar *g;
332 node *n;
333{
334 int i;
335
336 if (n == NULL)
337 return;
338 if (ISNONTERMINAL(TYPE(n))) {
339 for (i = 0; i < NCH(n); i++)
340 showtree(g, CHILD(n, i));
341 }
342 else if (ISTERMINAL(TYPE(n))) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000343 printf("%s", _PyParser_TokenNames[TYPE(n)]);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000344 if (TYPE(n) == NUMBER || TYPE(n) == NAME)
345 printf("(%s)", STR(n));
346 printf(" ");
347 }
348 else
349 printf("? ");
350}
351
352void
353printtree(ps)
354 parser_state *ps;
355{
Guido van Rossum86bea461997-04-29 21:03:06 +0000356 if (Py_DebugFlag) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000357 printf("Parse tree:\n");
358 dumptree(ps->p_grammar, ps->p_tree);
359 printf("\n");
360 printf("Tokens:\n");
361 showtree(ps->p_grammar, ps->p_tree);
362 printf("\n");
363 }
364 printf("Listing:\n");
Guido van Rossum86bea461997-04-29 21:03:06 +0000365 PyNode_ListTree(ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000366 printf("\n");
367}
368
Guido van Rossum408027e1996-12-30 16:17:54 +0000369#endif /* Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000370
371/*
372
373Description
374-----------
375
376The parser's interface is different than usual: the function addtoken()
377must be called for each token in the input. This makes it possible to
378turn it into an incremental parsing system later. The parsing system
379constructs a parse tree as it goes.
380
381A parsing rule is represented as a Deterministic Finite-state Automaton
382(DFA). A node in a DFA represents a state of the parser; an arc represents
383a transition. Transitions are either labeled with terminal symbols or
384with non-terminals. When the parser decides to follow an arc labeled
385with a non-terminal, it is invoked recursively with the DFA representing
386the parsing rule for that as its initial state; when that DFA accepts,
387the parser that invoked it continues. The parse tree constructed by the
388recursively called parser is inserted as a child in the current parse tree.
389
390The DFA's can be constructed automatically from a more conventional
391language description. An extended LL(1) grammar (ELL(1)) is suitable.
392Certain restrictions make the parser's life easier: rules that can produce
393the empty string should be outlawed (there are other ways to put loops
394or optional parts in the language). To avoid the need to construct
395FIRST sets, we can require that all but the last alternative of a rule
396(really: arc going out of a DFA's state) must begin with a terminal
397symbol.
398
399As an example, consider this grammar:
400
401expr: term (OP term)*
402term: CONSTANT | '(' expr ')'
403
404The DFA corresponding to the rule for expr is:
405
406------->.---term-->.------->
407 ^ |
408 | |
409 \----OP----/
410
411The parse tree generated for the input a+b is:
412
413(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
414
415*/