blob: ada6be2b9c93dc829572463a64d2f249415de8d7 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* Parser implementation */
3
4/* For a description, see the comments at end of this file */
5
6/* XXX To do: error recovery */
7
Tim Peters1ca12962001-12-04 03:18:48 +00008#include "Python.h"
Guido van Rossum3f5da241990-12-20 15:06:42 +00009#include "pgenheaders.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000010#include "token.h"
11#include "grammar.h"
12#include "node.h"
13#include "parser.h"
14#include "errcode.h"
15
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000016
Guido van Rossum408027e1996-12-30 16:17:54 +000017#ifdef Py_DEBUG
Guido van Rossum86bea461997-04-29 21:03:06 +000018extern int Py_DebugFlag;
19#define D(x) if (!Py_DebugFlag); else x
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000020#else
21#define D(x)
22#endif
23
24
25/* STACK DATA TYPE */
26
Tim Petersdbd9ba62000-07-09 03:09:57 +000027static void s_reset(stack *);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000028
29static void
Thomas Wouters23c9e002000-07-22 19:20:54 +000030s_reset(stack *s)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000031{
32 s->s_top = &s->s_base[MAXSTACK];
33}
34
35#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
36
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000037static int
Thomas Wouters23c9e002000-07-22 19:20:54 +000038s_push(register stack *s, dfa *d, node *parent)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000039{
40 register stackentry *top;
41 if (s->s_top == s->s_base) {
42 fprintf(stderr, "s_push: parser stack overflow\n");
Guido van Rossume3c3b272000-10-02 10:21:59 +000043 return E_NOMEM;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000044 }
45 top = --s->s_top;
46 top->s_dfa = d;
47 top->s_parent = parent;
48 top->s_state = 0;
49 return 0;
50}
51
Guido van Rossum408027e1996-12-30 16:17:54 +000052#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054static void
Thomas Wouters23c9e002000-07-22 19:20:54 +000055s_pop(register stack *s)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000056{
Guido van Rossum588633d1994-12-30 15:46:02 +000057 if (s_empty(s))
Guido van Rossum86bea461997-04-29 21:03:06 +000058 Py_FatalError("s_pop: parser stack underflow -- FATAL");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000059 s->s_top++;
60}
61
Guido van Rossum408027e1996-12-30 16:17:54 +000062#else /* !Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000063
64#define s_pop(s) (s)->s_top++
65
66#endif
67
68
69/* PARSER CREATION */
70
71parser_state *
Thomas Wouters23c9e002000-07-22 19:20:54 +000072PyParser_New(grammar *g, int start)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000073{
74 parser_state *ps;
75
76 if (!g->g_accel)
Guido van Rossum86bea461997-04-29 21:03:06 +000077 PyGrammar_AddAccelerators(g);
78 ps = PyMem_NEW(parser_state, 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000079 if (ps == NULL)
80 return NULL;
81 ps->p_grammar = g;
Thomas Wouters34aa7ba2006-02-28 19:02:24 +000082#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
83 ps->p_flags = 0;
Neil Schemenauerc155dd42002-03-22 23:38:11 +000084#endif
Guido van Rossum86bea461997-04-29 21:03:06 +000085 ps->p_tree = PyNode_New(start);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000086 if (ps->p_tree == NULL) {
Guido van Rossum86bea461997-04-29 21:03:06 +000087 PyMem_DEL(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000088 return NULL;
89 }
90 s_reset(&ps->p_stack);
Guido van Rossum86bea461997-04-29 21:03:06 +000091 (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000092 return ps;
93}
94
95void
Thomas Wouters23c9e002000-07-22 19:20:54 +000096PyParser_Delete(parser_state *ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000097{
Guido van Rossum99f02d41990-11-18 17:38:42 +000098 /* NB If you want to save the parse tree,
99 you must set p_tree to NULL before calling delparser! */
Guido van Rossum86bea461997-04-29 21:03:06 +0000100 PyNode_Free(ps->p_tree);
101 PyMem_DEL(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102}
103
104
105/* PARSER STACK OPERATIONS */
106
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107static int
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000108shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000109{
Jeremy Hylton94988062000-06-20 19:10:44 +0000110 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000111 assert(!s_empty(s));
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000112 err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
Jeremy Hylton94988062000-06-20 19:10:44 +0000113 if (err)
114 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000115 s->s_top->s_state = newstate;
116 return 0;
117}
118
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000119static int
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000120push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000121{
Jeremy Hylton94988062000-06-20 19:10:44 +0000122 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000123 register node *n;
124 n = s->s_top->s_parent;
125 assert(!s_empty(s));
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000126 err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
Jeremy Hylton94988062000-06-20 19:10:44 +0000127 if (err)
128 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000129 s->s_top->s_state = newstate;
130 return s_push(s, d, CHILD(n, NCH(n)-1));
131}
132
133
134/* PARSER PROPER */
135
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000136static int
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000137classify(parser_state *ps, int type, char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000138{
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000139 grammar *g = ps->p_grammar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000140 register int n = g->g_ll.ll_nlabels;
141
142 if (type == NAME) {
143 register char *s = str;
144 register label *l = g->g_ll.ll_label;
145 register int i;
146 for (i = n; i > 0; i--, l++) {
Thomas Wouters8ae12952006-02-28 22:42:15 +0000147 if (l->lb_type != NAME || l->lb_str == NULL ||
148 l->lb_str[0] != s[0] ||
149 strcmp(l->lb_str, s) != 0)
150 continue;
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000151#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Thomas Wouters8ae12952006-02-28 22:42:15 +0000152 if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) {
153 if (s[0] == 'w' && strcmp(s, "with") == 0)
154 break; /* not a keyword yet */
155 else if (s[0] == 'a' && strcmp(s, "as") == 0)
156 break; /* not a keyword yet */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000157 }
Thomas Wouters8ae12952006-02-28 22:42:15 +0000158#endif
159 D(printf("It's a keyword\n"));
160 return n - i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000161 }
162 }
163
164 {
165 register label *l = g->g_ll.ll_label;
166 register int i;
167 for (i = n; i > 0; i--, l++) {
168 if (l->lb_type == type && l->lb_str == NULL) {
169 D(printf("It's a token we know\n"));
170 return n - i;
171 }
172 }
173 }
174
175 D(printf("Illegal token\n"));
176 return -1;
177}
178
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000179#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000180static void
181future_hack(parser_state *ps)
182{
183 node *n = ps->p_stack.s_top->s_parent;
184 node *ch;
Guido van Rossum3c033232001-07-19 15:27:45 +0000185 int i;
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000186
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000187 /* from __future__ import ..., must have at least 4 children */
188 n = CHILD(n, 0);
189 if (NCH(n) < 4)
190 return;
191 ch = CHILD(n, 0);
192 if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000193 return;
194 ch = CHILD(n, 1);
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000195 if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
196 strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000197 return;
Guido van Rossum3c033232001-07-19 15:27:45 +0000198 for (i = 3; i < NCH(n); i += 2) {
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000199 /* XXX: assume we don't have parentheses in import:
200 from __future__ import (x, y, z)
201 */
Guido van Rossum3c033232001-07-19 15:27:45 +0000202 ch = CHILD(n, i);
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000203 if (NCH(ch) == 1)
204 ch = CHILD(ch, 0);
Guido van Rossum3c033232001-07-19 15:27:45 +0000205 if (NCH(ch) >= 1 && TYPE(CHILD(ch, 0)) == NAME &&
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000206 strcmp(STR(CHILD(ch, 0)), "with_statement") == 0) {
207 ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
Guido van Rossum3c033232001-07-19 15:27:45 +0000208 break;
209 }
210 }
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000211}
Neil Schemenauerc155dd42002-03-22 23:38:11 +0000212#endif /* future keyword */
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000213
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000214int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000215PyParser_AddToken(register parser_state *ps, register int type, char *str,
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000216 int lineno, int col_offset, int *expected_ret)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000217{
218 register int ilabel;
Jeremy Hylton94988062000-06-20 19:10:44 +0000219 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000220
Guido van Rossum86bea461997-04-29 21:03:06 +0000221 D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000222
223 /* Find out which label this token is */
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000224 ilabel = classify(ps, type, str);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000225 if (ilabel < 0)
226 return E_SYNTAX;
227
228 /* Loop until the token is shifted or an error occurred */
229 for (;;) {
230 /* Fetch the current dfa and state */
231 register dfa *d = ps->p_stack.s_top->s_dfa;
232 register state *s = &d->d_state[ps->p_stack.s_top->s_state];
233
234 D(printf(" DFA '%s', state %d:",
235 d->d_name, ps->p_stack.s_top->s_state));
236
237 /* Check accelerator */
238 if (s->s_lower <= ilabel && ilabel < s->s_upper) {
239 register int x = s->s_accel[ilabel - s->s_lower];
240 if (x != -1) {
241 if (x & (1<<7)) {
242 /* Push non-terminal */
243 int nt = (x >> 8) + NT_OFFSET;
244 int arrow = x & ((1<<7)-1);
Guido van Rossum86bea461997-04-29 21:03:06 +0000245 dfa *d1 = PyGrammar_FindDFA(
246 ps->p_grammar, nt);
Jeremy Hylton94988062000-06-20 19:10:44 +0000247 if ((err = push(&ps->p_stack, nt, d1,
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000248 arrow, lineno, col_offset)) > 0) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000249 D(printf(" MemError: push\n"));
Jeremy Hylton94988062000-06-20 19:10:44 +0000250 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000251 }
252 D(printf(" Push ...\n"));
253 continue;
254 }
255
256 /* Shift the token */
Jeremy Hylton94988062000-06-20 19:10:44 +0000257 if ((err = shift(&ps->p_stack, type, str,
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000258 x, lineno, col_offset)) > 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000259 D(printf(" MemError: shift.\n"));
Jeremy Hylton94988062000-06-20 19:10:44 +0000260 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000261 }
262 D(printf(" Shift.\n"));
263 /* Pop while we are in an accept-only state */
264 while (s = &d->d_state
265 [ps->p_stack.s_top->s_state],
266 s->s_accept && s->s_narcs == 1) {
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000267 D(printf(" DFA '%s', state %d: "
268 "Direct pop.\n",
269 d->d_name,
270 ps->p_stack.s_top->s_state));
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000271#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000272 if (d->d_name[0] == 'i' &&
273 strcmp(d->d_name,
274 "import_stmt") == 0)
275 future_hack(ps);
Neil Schemenauerc155dd42002-03-22 23:38:11 +0000276#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000277 s_pop(&ps->p_stack);
278 if (s_empty(&ps->p_stack)) {
279 D(printf(" ACCEPT.\n"));
280 return E_DONE;
281 }
282 d = ps->p_stack.s_top->s_dfa;
283 }
284 return E_OK;
285 }
286 }
287
288 if (s->s_accept) {
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000289#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000290 if (d->d_name[0] == 'i' &&
291 strcmp(d->d_name, "import_stmt") == 0)
292 future_hack(ps);
Neil Schemenauerc155dd42002-03-22 23:38:11 +0000293#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000294 /* Pop this dfa and try again */
295 s_pop(&ps->p_stack);
296 D(printf(" Pop ...\n"));
297 if (s_empty(&ps->p_stack)) {
298 D(printf(" Error: bottom of stack.\n"));
299 return E_SYNTAX;
300 }
301 continue;
302 }
303
304 /* Stuck, report syntax error */
305 D(printf(" Error.\n"));
Fred Drake85f36392000-07-11 17:53:00 +0000306 if (expected_ret) {
307 if (s->s_lower == s->s_upper - 1) {
308 /* Only one possible expected token */
309 *expected_ret = ps->p_grammar->
310 g_ll.ll_label[s->s_lower].lb_type;
311 }
312 else
313 *expected_ret = -1;
314 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000315 return E_SYNTAX;
316 }
317}
318
319
Guido van Rossum408027e1996-12-30 16:17:54 +0000320#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000321
322/* DEBUG OUTPUT */
323
324void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000325dumptree(grammar *g, node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000326{
327 int i;
328
329 if (n == NULL)
330 printf("NIL");
331 else {
332 label l;
333 l.lb_type = TYPE(n);
Guido van Rossumcf7448b1992-09-03 20:46:37 +0000334 l.lb_str = STR(n);
Guido van Rossum86bea461997-04-29 21:03:06 +0000335 printf("%s", PyGrammar_LabelRepr(&l));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000336 if (ISNONTERMINAL(TYPE(n))) {
337 printf("(");
338 for (i = 0; i < NCH(n); i++) {
339 if (i > 0)
340 printf(",");
341 dumptree(g, CHILD(n, i));
342 }
343 printf(")");
344 }
345 }
346}
347
348void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000349showtree(grammar *g, node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000350{
351 int i;
352
353 if (n == NULL)
354 return;
355 if (ISNONTERMINAL(TYPE(n))) {
356 for (i = 0; i < NCH(n); i++)
357 showtree(g, CHILD(n, i));
358 }
359 else if (ISTERMINAL(TYPE(n))) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000360 printf("%s", _PyParser_TokenNames[TYPE(n)]);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000361 if (TYPE(n) == NUMBER || TYPE(n) == NAME)
362 printf("(%s)", STR(n));
363 printf(" ");
364 }
365 else
366 printf("? ");
367}
368
369void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000370printtree(parser_state *ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000371{
Guido van Rossum86bea461997-04-29 21:03:06 +0000372 if (Py_DebugFlag) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000373 printf("Parse tree:\n");
374 dumptree(ps->p_grammar, ps->p_tree);
375 printf("\n");
376 printf("Tokens:\n");
377 showtree(ps->p_grammar, ps->p_tree);
378 printf("\n");
379 }
380 printf("Listing:\n");
Guido van Rossum86bea461997-04-29 21:03:06 +0000381 PyNode_ListTree(ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000382 printf("\n");
383}
384
Guido van Rossum408027e1996-12-30 16:17:54 +0000385#endif /* Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000386
387/*
388
389Description
390-----------
391
392The parser's interface is different than usual: the function addtoken()
393must be called for each token in the input. This makes it possible to
394turn it into an incremental parsing system later. The parsing system
395constructs a parse tree as it goes.
396
397A parsing rule is represented as a Deterministic Finite-state Automaton
398(DFA). A node in a DFA represents a state of the parser; an arc represents
399a transition. Transitions are either labeled with terminal symbols or
400with non-terminals. When the parser decides to follow an arc labeled
401with a non-terminal, it is invoked recursively with the DFA representing
402the parsing rule for that as its initial state; when that DFA accepts,
403the parser that invoked it continues. The parse tree constructed by the
404recursively called parser is inserted as a child in the current parse tree.
405
406The DFA's can be constructed automatically from a more conventional
407language description. An extended LL(1) grammar (ELL(1)) is suitable.
408Certain restrictions make the parser's life easier: rules that can produce
409the empty string should be outlawed (there are other ways to put loops
410or optional parts in the language). To avoid the need to construct
411FIRST sets, we can require that all but the last alternative of a rule
412(really: arc going out of a DFA's state) must begin with a terminal
413symbol.
414
415As an example, consider this grammar:
416
417expr: term (OP term)*
418term: CONSTANT | '(' expr ')'
419
420The DFA corresponding to the rule for expr is:
421
422------->.---term-->.------->
423 ^ |
424 | |
425 \----OP----/
426
427The parse tree generated for the input a+b is:
428
429(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
430
431*/