blob: a9125e29a99d7a0de780c372866a7bf96cd034e0 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* Parser implementation */
3
4/* For a description, see the comments at end of this file */
5
6/* XXX To do: error recovery */
7
Guido van Rossum3f5da241990-12-20 15:06:42 +00008#include "pgenheaders.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00009#include "assert.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000010#include "token.h"
11#include "grammar.h"
12#include "node.h"
13#include "parser.h"
14#include "errcode.h"
15
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000016
Guido van Rossum408027e1996-12-30 16:17:54 +000017#ifdef Py_DEBUG
Guido van Rossum86bea461997-04-29 21:03:06 +000018extern int Py_DebugFlag;
19#define D(x) if (!Py_DebugFlag); else x
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000020#else
21#define D(x)
22#endif
23
24
25/* STACK DATA TYPE */
26
Tim Petersdbd9ba62000-07-09 03:09:57 +000027static void s_reset(stack *);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000028
29static void
Thomas Wouters23c9e002000-07-22 19:20:54 +000030s_reset(stack *s)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000031{
32 s->s_top = &s->s_base[MAXSTACK];
33}
34
35#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
36
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000037static int
Thomas Wouters23c9e002000-07-22 19:20:54 +000038s_push(register stack *s, dfa *d, node *parent)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000039{
40 register stackentry *top;
41 if (s->s_top == s->s_base) {
42 fprintf(stderr, "s_push: parser stack overflow\n");
Guido van Rossume3c3b272000-10-02 10:21:59 +000043 return E_NOMEM;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000044 }
45 top = --s->s_top;
46 top->s_dfa = d;
47 top->s_parent = parent;
48 top->s_state = 0;
49 return 0;
50}
51
Guido van Rossum408027e1996-12-30 16:17:54 +000052#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054static void
Thomas Wouters23c9e002000-07-22 19:20:54 +000055s_pop(register stack *s)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000056{
Guido van Rossum588633d1994-12-30 15:46:02 +000057 if (s_empty(s))
Guido van Rossum86bea461997-04-29 21:03:06 +000058 Py_FatalError("s_pop: parser stack underflow -- FATAL");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000059 s->s_top++;
60}
61
Guido van Rossum408027e1996-12-30 16:17:54 +000062#else /* !Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000063
64#define s_pop(s) (s)->s_top++
65
66#endif
67
68
69/* PARSER CREATION */
70
71parser_state *
Thomas Wouters23c9e002000-07-22 19:20:54 +000072PyParser_New(grammar *g, int start)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000073{
74 parser_state *ps;
75
76 if (!g->g_accel)
Guido van Rossum86bea461997-04-29 21:03:06 +000077 PyGrammar_AddAccelerators(g);
78 ps = PyMem_NEW(parser_state, 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000079 if (ps == NULL)
80 return NULL;
81 ps->p_grammar = g;
Guido van Rossumb09f7ed2001-07-15 21:08:29 +000082 ps->p_generators = 0;
Guido van Rossum86bea461997-04-29 21:03:06 +000083 ps->p_tree = PyNode_New(start);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000084 if (ps->p_tree == NULL) {
Guido van Rossum86bea461997-04-29 21:03:06 +000085 PyMem_DEL(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000086 return NULL;
87 }
88 s_reset(&ps->p_stack);
Guido van Rossum86bea461997-04-29 21:03:06 +000089 (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000090 return ps;
91}
92
93void
Thomas Wouters23c9e002000-07-22 19:20:54 +000094PyParser_Delete(parser_state *ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000095{
Guido van Rossum99f02d41990-11-18 17:38:42 +000096 /* NB If you want to save the parse tree,
97 you must set p_tree to NULL before calling delparser! */
Guido van Rossum86bea461997-04-29 21:03:06 +000098 PyNode_Free(ps->p_tree);
99 PyMem_DEL(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000100}
101
102
103/* PARSER STACK OPERATIONS */
104
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000105static int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000106shift(register stack *s, int type, char *str, int newstate, int lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107{
Jeremy Hylton94988062000-06-20 19:10:44 +0000108 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000109 assert(!s_empty(s));
Jeremy Hylton94988062000-06-20 19:10:44 +0000110 err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno);
111 if (err)
112 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000113 s->s_top->s_state = newstate;
114 return 0;
115}
116
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000117static int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000118push(register stack *s, int type, dfa *d, int newstate, int lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000119{
Jeremy Hylton94988062000-06-20 19:10:44 +0000120 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000121 register node *n;
122 n = s->s_top->s_parent;
123 assert(!s_empty(s));
Jeremy Hylton94988062000-06-20 19:10:44 +0000124 err = PyNode_AddChild(n, type, (char *)NULL, lineno);
125 if (err)
126 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000127 s->s_top->s_state = newstate;
128 return s_push(s, d, CHILD(n, NCH(n)-1));
129}
130
131
132/* PARSER PROPER */
133
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000134static int
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000135classify(parser_state *ps, int type, char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000136{
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000137 grammar *g = ps->p_grammar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000138 register int n = g->g_ll.ll_nlabels;
139
140 if (type == NAME) {
141 register char *s = str;
142 register label *l = g->g_ll.ll_label;
143 register int i;
144 for (i = n; i > 0; i--, l++) {
145 if (l->lb_type == NAME && l->lb_str != NULL &&
146 l->lb_str[0] == s[0] &&
147 strcmp(l->lb_str, s) == 0) {
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000148 if (!ps->p_generators &&
149 s[0] == 'y' &&
150 strcmp(s, "yield") == 0)
151 break; /* not a keyword */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152 D(printf("It's a keyword\n"));
153 return n - i;
154 }
155 }
156 }
157
158 {
159 register label *l = g->g_ll.ll_label;
160 register int i;
161 for (i = n; i > 0; i--, l++) {
162 if (l->lb_type == type && l->lb_str == NULL) {
163 D(printf("It's a token we know\n"));
164 return n - i;
165 }
166 }
167 }
168
169 D(printf("Illegal token\n"));
170 return -1;
171}
172
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000173static void
174future_hack(parser_state *ps)
175{
176 node *n = ps->p_stack.s_top->s_parent;
177 node *ch;
Guido van Rossum3c033232001-07-19 15:27:45 +0000178 int i;
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000179
180 if (strcmp(STR(CHILD(n, 0)), "from") != 0)
181 return;
182 ch = CHILD(n, 1);
183 if (strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
184 return;
Guido van Rossum3c033232001-07-19 15:27:45 +0000185 for (i = 3; i < NCH(n); i += 2) {
186 ch = CHILD(n, i);
187 if (NCH(ch) >= 1 && TYPE(CHILD(ch, 0)) == NAME &&
188 strcmp(STR(CHILD(ch, 0)), "generators") == 0) {
189 ps->p_generators = 1;
190 break;
191 }
192 }
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000193}
194
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000195int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000196PyParser_AddToken(register parser_state *ps, register int type, char *str,
197 int lineno, int *expected_ret)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000198{
199 register int ilabel;
Jeremy Hylton94988062000-06-20 19:10:44 +0000200 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000201
Guido van Rossum86bea461997-04-29 21:03:06 +0000202 D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000203
204 /* Find out which label this token is */
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000205 ilabel = classify(ps, type, str);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000206 if (ilabel < 0)
207 return E_SYNTAX;
208
209 /* Loop until the token is shifted or an error occurred */
210 for (;;) {
211 /* Fetch the current dfa and state */
212 register dfa *d = ps->p_stack.s_top->s_dfa;
213 register state *s = &d->d_state[ps->p_stack.s_top->s_state];
214
215 D(printf(" DFA '%s', state %d:",
216 d->d_name, ps->p_stack.s_top->s_state));
217
218 /* Check accelerator */
219 if (s->s_lower <= ilabel && ilabel < s->s_upper) {
220 register int x = s->s_accel[ilabel - s->s_lower];
221 if (x != -1) {
222 if (x & (1<<7)) {
223 /* Push non-terminal */
224 int nt = (x >> 8) + NT_OFFSET;
225 int arrow = x & ((1<<7)-1);
Guido van Rossum86bea461997-04-29 21:03:06 +0000226 dfa *d1 = PyGrammar_FindDFA(
227 ps->p_grammar, nt);
Jeremy Hylton94988062000-06-20 19:10:44 +0000228 if ((err = push(&ps->p_stack, nt, d1,
229 arrow, lineno)) > 0) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000230 D(printf(" MemError: push\n"));
Jeremy Hylton94988062000-06-20 19:10:44 +0000231 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000232 }
233 D(printf(" Push ...\n"));
234 continue;
235 }
236
237 /* Shift the token */
Jeremy Hylton94988062000-06-20 19:10:44 +0000238 if ((err = shift(&ps->p_stack, type, str,
239 x, lineno)) > 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000240 D(printf(" MemError: shift.\n"));
Jeremy Hylton94988062000-06-20 19:10:44 +0000241 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000242 }
243 D(printf(" Shift.\n"));
244 /* Pop while we are in an accept-only state */
245 while (s = &d->d_state
246 [ps->p_stack.s_top->s_state],
247 s->s_accept && s->s_narcs == 1) {
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000248 D(printf(" DFA '%s', state %d: "
249 "Direct pop.\n",
250 d->d_name,
251 ps->p_stack.s_top->s_state));
252 if (d->d_name[0] == 'i' &&
253 strcmp(d->d_name,
254 "import_stmt") == 0)
255 future_hack(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000256 s_pop(&ps->p_stack);
257 if (s_empty(&ps->p_stack)) {
258 D(printf(" ACCEPT.\n"));
259 return E_DONE;
260 }
261 d = ps->p_stack.s_top->s_dfa;
262 }
263 return E_OK;
264 }
265 }
266
267 if (s->s_accept) {
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000268 if (d->d_name[0] == 'i' &&
269 strcmp(d->d_name, "import_stmt") == 0)
270 future_hack(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000271 /* Pop this dfa and try again */
272 s_pop(&ps->p_stack);
273 D(printf(" Pop ...\n"));
274 if (s_empty(&ps->p_stack)) {
275 D(printf(" Error: bottom of stack.\n"));
276 return E_SYNTAX;
277 }
278 continue;
279 }
280
281 /* Stuck, report syntax error */
282 D(printf(" Error.\n"));
Fred Drake85f36392000-07-11 17:53:00 +0000283 if (expected_ret) {
284 if (s->s_lower == s->s_upper - 1) {
285 /* Only one possible expected token */
286 *expected_ret = ps->p_grammar->
287 g_ll.ll_label[s->s_lower].lb_type;
288 }
289 else
290 *expected_ret = -1;
291 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000292 return E_SYNTAX;
293 }
294}
295
296
Guido van Rossum408027e1996-12-30 16:17:54 +0000297#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000298
299/* DEBUG OUTPUT */
300
301void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000302dumptree(grammar *g, node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000303{
304 int i;
305
306 if (n == NULL)
307 printf("NIL");
308 else {
309 label l;
310 l.lb_type = TYPE(n);
Guido van Rossumcf7448b1992-09-03 20:46:37 +0000311 l.lb_str = STR(n);
Guido van Rossum86bea461997-04-29 21:03:06 +0000312 printf("%s", PyGrammar_LabelRepr(&l));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000313 if (ISNONTERMINAL(TYPE(n))) {
314 printf("(");
315 for (i = 0; i < NCH(n); i++) {
316 if (i > 0)
317 printf(",");
318 dumptree(g, CHILD(n, i));
319 }
320 printf(")");
321 }
322 }
323}
324
325void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000326showtree(grammar *g, node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000327{
328 int i;
329
330 if (n == NULL)
331 return;
332 if (ISNONTERMINAL(TYPE(n))) {
333 for (i = 0; i < NCH(n); i++)
334 showtree(g, CHILD(n, i));
335 }
336 else if (ISTERMINAL(TYPE(n))) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000337 printf("%s", _PyParser_TokenNames[TYPE(n)]);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000338 if (TYPE(n) == NUMBER || TYPE(n) == NAME)
339 printf("(%s)", STR(n));
340 printf(" ");
341 }
342 else
343 printf("? ");
344}
345
346void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000347printtree(parser_state *ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000348{
Guido van Rossum86bea461997-04-29 21:03:06 +0000349 if (Py_DebugFlag) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000350 printf("Parse tree:\n");
351 dumptree(ps->p_grammar, ps->p_tree);
352 printf("\n");
353 printf("Tokens:\n");
354 showtree(ps->p_grammar, ps->p_tree);
355 printf("\n");
356 }
357 printf("Listing:\n");
Guido van Rossum86bea461997-04-29 21:03:06 +0000358 PyNode_ListTree(ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000359 printf("\n");
360}
361
Guido van Rossum408027e1996-12-30 16:17:54 +0000362#endif /* Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000363
364/*
365
366Description
367-----------
368
369The parser's interface is different than usual: the function addtoken()
370must be called for each token in the input. This makes it possible to
371turn it into an incremental parsing system later. The parsing system
372constructs a parse tree as it goes.
373
374A parsing rule is represented as a Deterministic Finite-state Automaton
375(DFA). A node in a DFA represents a state of the parser; an arc represents
376a transition. Transitions are either labeled with terminal symbols or
377with non-terminals. When the parser decides to follow an arc labeled
378with a non-terminal, it is invoked recursively with the DFA representing
379the parsing rule for that as its initial state; when that DFA accepts,
380the parser that invoked it continues. The parse tree constructed by the
381recursively called parser is inserted as a child in the current parse tree.
382
383The DFA's can be constructed automatically from a more conventional
384language description. An extended LL(1) grammar (ELL(1)) is suitable.
385Certain restrictions make the parser's life easier: rules that can produce
386the empty string should be outlawed (there are other ways to put loops
387or optional parts in the language). To avoid the need to construct
388FIRST sets, we can require that all but the last alternative of a rule
389(really: arc going out of a DFA's state) must begin with a terminal
390symbol.
391
392As an example, consider this grammar:
393
394expr: term (OP term)*
395term: CONSTANT | '(' expr ')'
396
397The DFA corresponding to the rule for expr is:
398
399------->.---term-->.------->
400 ^ |
401 | |
402 \----OP----/
403
404The parse tree generated for the input a+b is:
405
406(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
407
408*/