blob: 2ce84cd32b828e270e67e0ca9b1d25b283386cb3 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* Parser implementation */
3
4/* For a description, see the comments at end of this file */
5
6/* XXX To do: error recovery */
7
Tim Peters1ca12962001-12-04 03:18:48 +00008#include "Python.h"
Guido van Rossum3f5da241990-12-20 15:06:42 +00009#include "pgenheaders.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000010#include "token.h"
11#include "grammar.h"
12#include "node.h"
13#include "parser.h"
14#include "errcode.h"
15
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000016
Guido van Rossum408027e1996-12-30 16:17:54 +000017#ifdef Py_DEBUG
Guido van Rossum86bea461997-04-29 21:03:06 +000018extern int Py_DebugFlag;
19#define D(x) if (!Py_DebugFlag); else x
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000020#else
21#define D(x)
22#endif
23
24
25/* STACK DATA TYPE */
26
Tim Petersdbd9ba62000-07-09 03:09:57 +000027static void s_reset(stack *);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000028
29static void
Thomas Wouters23c9e002000-07-22 19:20:54 +000030s_reset(stack *s)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000031{
32 s->s_top = &s->s_base[MAXSTACK];
33}
34
35#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
36
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000037static int
Thomas Wouters23c9e002000-07-22 19:20:54 +000038s_push(register stack *s, dfa *d, node *parent)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000039{
40 register stackentry *top;
41 if (s->s_top == s->s_base) {
42 fprintf(stderr, "s_push: parser stack overflow\n");
Guido van Rossume3c3b272000-10-02 10:21:59 +000043 return E_NOMEM;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000044 }
45 top = --s->s_top;
46 top->s_dfa = d;
47 top->s_parent = parent;
48 top->s_state = 0;
49 return 0;
50}
51
Guido van Rossum408027e1996-12-30 16:17:54 +000052#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054static void
Thomas Wouters23c9e002000-07-22 19:20:54 +000055s_pop(register stack *s)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000056{
Guido van Rossum588633d1994-12-30 15:46:02 +000057 if (s_empty(s))
Guido van Rossum86bea461997-04-29 21:03:06 +000058 Py_FatalError("s_pop: parser stack underflow -- FATAL");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000059 s->s_top++;
60}
61
Guido van Rossum408027e1996-12-30 16:17:54 +000062#else /* !Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000063
64#define s_pop(s) (s)->s_top++
65
66#endif
67
68
69/* PARSER CREATION */
70
71parser_state *
Thomas Wouters23c9e002000-07-22 19:20:54 +000072PyParser_New(grammar *g, int start)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000073{
74 parser_state *ps;
75
76 if (!g->g_accel)
Guido van Rossum86bea461997-04-29 21:03:06 +000077 PyGrammar_AddAccelerators(g);
Anthony Baxter11490022006-04-11 05:39:14 +000078 ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000079 if (ps == NULL)
80 return NULL;
81 ps->p_grammar = g;
Thomas Wouters34aa7ba2006-02-28 19:02:24 +000082#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
83 ps->p_flags = 0;
Neil Schemenauerc155dd42002-03-22 23:38:11 +000084#endif
Guido van Rossum86bea461997-04-29 21:03:06 +000085 ps->p_tree = PyNode_New(start);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000086 if (ps->p_tree == NULL) {
Neal Norwitz2c4e4f92006-04-10 06:42:25 +000087 PyMem_FREE(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000088 return NULL;
89 }
90 s_reset(&ps->p_stack);
Guido van Rossum86bea461997-04-29 21:03:06 +000091 (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000092 return ps;
93}
94
95void
Thomas Wouters23c9e002000-07-22 19:20:54 +000096PyParser_Delete(parser_state *ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000097{
Guido van Rossum99f02d41990-11-18 17:38:42 +000098 /* NB If you want to save the parse tree,
99 you must set p_tree to NULL before calling delparser! */
Guido van Rossum86bea461997-04-29 21:03:06 +0000100 PyNode_Free(ps->p_tree);
Neal Norwitz2c4e4f92006-04-10 06:42:25 +0000101 PyMem_FREE(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102}
103
104
105/* PARSER STACK OPERATIONS */
106
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107static int
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000108shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000109{
Jeremy Hylton94988062000-06-20 19:10:44 +0000110 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000111 assert(!s_empty(s));
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000112 err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
Jeremy Hylton94988062000-06-20 19:10:44 +0000113 if (err)
114 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000115 s->s_top->s_state = newstate;
116 return 0;
117}
118
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000119static int
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000120push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000121{
Jeremy Hylton94988062000-06-20 19:10:44 +0000122 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000123 register node *n;
124 n = s->s_top->s_parent;
125 assert(!s_empty(s));
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000126 err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
Jeremy Hylton94988062000-06-20 19:10:44 +0000127 if (err)
128 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000129 s->s_top->s_state = newstate;
130 return s_push(s, d, CHILD(n, NCH(n)-1));
131}
132
133
134/* PARSER PROPER */
135
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000136static int
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000137classify(parser_state *ps, int type, char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000138{
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000139 grammar *g = ps->p_grammar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000140 register int n = g->g_ll.ll_nlabels;
141
142 if (type == NAME) {
143 register char *s = str;
144 register label *l = g->g_ll.ll_label;
145 register int i;
146 for (i = n; i > 0; i--, l++) {
Thomas Wouters8ae12952006-02-28 22:42:15 +0000147 if (l->lb_type != NAME || l->lb_str == NULL ||
148 l->lb_str[0] != s[0] ||
149 strcmp(l->lb_str, s) != 0)
150 continue;
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000151#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Thomas Wouters8ae12952006-02-28 22:42:15 +0000152 if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) {
153 if (s[0] == 'w' && strcmp(s, "with") == 0)
154 break; /* not a keyword yet */
155 else if (s[0] == 'a' && strcmp(s, "as") == 0)
156 break; /* not a keyword yet */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000157 }
Thomas Wouters8ae12952006-02-28 22:42:15 +0000158#endif
159 D(printf("It's a keyword\n"));
160 return n - i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000161 }
162 }
163
164 {
165 register label *l = g->g_ll.ll_label;
166 register int i;
167 for (i = n; i > 0; i--, l++) {
168 if (l->lb_type == type && l->lb_str == NULL) {
169 D(printf("It's a token we know\n"));
170 return n - i;
171 }
172 }
173 }
174
175 D(printf("Illegal token\n"));
176 return -1;
177}
178
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000179#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000180static void
181future_hack(parser_state *ps)
182{
183 node *n = ps->p_stack.s_top->s_parent;
Georg Brandla10d3af2006-09-24 12:35:36 +0000184 node *ch, *cch;
Guido van Rossum3c033232001-07-19 15:27:45 +0000185 int i;
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000186
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000187 /* from __future__ import ..., must have at least 4 children */
188 n = CHILD(n, 0);
189 if (NCH(n) < 4)
190 return;
191 ch = CHILD(n, 0);
192 if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000193 return;
194 ch = CHILD(n, 1);
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000195 if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
196 strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000197 return;
Georg Brandla10d3af2006-09-24 12:35:36 +0000198 ch = CHILD(n, 3);
199 /* ch can be a star, a parenthesis or import_as_names */
200 if (TYPE(ch) == STAR)
201 return;
202 if (TYPE(ch) == LPAR)
203 ch = CHILD(n, 4);
204
205 for (i = 0; i < NCH(ch); i += 2) {
206 cch = CHILD(ch, i);
207 if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME &&
208 strcmp(STR(CHILD(cch, 0)), "with_statement") == 0) {
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000209 ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
Guido van Rossum3c033232001-07-19 15:27:45 +0000210 break;
211 }
212 }
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000213}
Neil Schemenauerc155dd42002-03-22 23:38:11 +0000214#endif /* future keyword */
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000215
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000216int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000217PyParser_AddToken(register parser_state *ps, register int type, char *str,
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000218 int lineno, int col_offset, int *expected_ret)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000219{
220 register int ilabel;
Jeremy Hylton94988062000-06-20 19:10:44 +0000221 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000222
Guido van Rossum86bea461997-04-29 21:03:06 +0000223 D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000224
225 /* Find out which label this token is */
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000226 ilabel = classify(ps, type, str);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000227 if (ilabel < 0)
228 return E_SYNTAX;
229
230 /* Loop until the token is shifted or an error occurred */
231 for (;;) {
232 /* Fetch the current dfa and state */
233 register dfa *d = ps->p_stack.s_top->s_dfa;
234 register state *s = &d->d_state[ps->p_stack.s_top->s_state];
235
236 D(printf(" DFA '%s', state %d:",
237 d->d_name, ps->p_stack.s_top->s_state));
238
239 /* Check accelerator */
240 if (s->s_lower <= ilabel && ilabel < s->s_upper) {
241 register int x = s->s_accel[ilabel - s->s_lower];
242 if (x != -1) {
243 if (x & (1<<7)) {
244 /* Push non-terminal */
245 int nt = (x >> 8) + NT_OFFSET;
246 int arrow = x & ((1<<7)-1);
Guido van Rossum86bea461997-04-29 21:03:06 +0000247 dfa *d1 = PyGrammar_FindDFA(
248 ps->p_grammar, nt);
Jeremy Hylton94988062000-06-20 19:10:44 +0000249 if ((err = push(&ps->p_stack, nt, d1,
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000250 arrow, lineno, col_offset)) > 0) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000251 D(printf(" MemError: push\n"));
Jeremy Hylton94988062000-06-20 19:10:44 +0000252 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000253 }
254 D(printf(" Push ...\n"));
255 continue;
256 }
257
258 /* Shift the token */
Jeremy Hylton94988062000-06-20 19:10:44 +0000259 if ((err = shift(&ps->p_stack, type, str,
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000260 x, lineno, col_offset)) > 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000261 D(printf(" MemError: shift.\n"));
Jeremy Hylton94988062000-06-20 19:10:44 +0000262 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000263 }
264 D(printf(" Shift.\n"));
265 /* Pop while we are in an accept-only state */
266 while (s = &d->d_state
267 [ps->p_stack.s_top->s_state],
268 s->s_accept && s->s_narcs == 1) {
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000269 D(printf(" DFA '%s', state %d: "
270 "Direct pop.\n",
271 d->d_name,
272 ps->p_stack.s_top->s_state));
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000273#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000274 if (d->d_name[0] == 'i' &&
275 strcmp(d->d_name,
276 "import_stmt") == 0)
277 future_hack(ps);
Neil Schemenauerc155dd42002-03-22 23:38:11 +0000278#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000279 s_pop(&ps->p_stack);
280 if (s_empty(&ps->p_stack)) {
281 D(printf(" ACCEPT.\n"));
282 return E_DONE;
283 }
284 d = ps->p_stack.s_top->s_dfa;
285 }
286 return E_OK;
287 }
288 }
289
290 if (s->s_accept) {
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000291#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000292 if (d->d_name[0] == 'i' &&
293 strcmp(d->d_name, "import_stmt") == 0)
294 future_hack(ps);
Neil Schemenauerc155dd42002-03-22 23:38:11 +0000295#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000296 /* Pop this dfa and try again */
297 s_pop(&ps->p_stack);
298 D(printf(" Pop ...\n"));
299 if (s_empty(&ps->p_stack)) {
300 D(printf(" Error: bottom of stack.\n"));
301 return E_SYNTAX;
302 }
303 continue;
304 }
305
306 /* Stuck, report syntax error */
307 D(printf(" Error.\n"));
Fred Drake85f36392000-07-11 17:53:00 +0000308 if (expected_ret) {
309 if (s->s_lower == s->s_upper - 1) {
310 /* Only one possible expected token */
311 *expected_ret = ps->p_grammar->
312 g_ll.ll_label[s->s_lower].lb_type;
313 }
314 else
315 *expected_ret = -1;
316 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000317 return E_SYNTAX;
318 }
319}
320
321
Guido van Rossum408027e1996-12-30 16:17:54 +0000322#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000323
324/* DEBUG OUTPUT */
325
326void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000327dumptree(grammar *g, node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000328{
329 int i;
330
331 if (n == NULL)
332 printf("NIL");
333 else {
334 label l;
335 l.lb_type = TYPE(n);
Guido van Rossumcf7448b1992-09-03 20:46:37 +0000336 l.lb_str = STR(n);
Guido van Rossum86bea461997-04-29 21:03:06 +0000337 printf("%s", PyGrammar_LabelRepr(&l));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000338 if (ISNONTERMINAL(TYPE(n))) {
339 printf("(");
340 for (i = 0; i < NCH(n); i++) {
341 if (i > 0)
342 printf(",");
343 dumptree(g, CHILD(n, i));
344 }
345 printf(")");
346 }
347 }
348}
349
350void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000351showtree(grammar *g, node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000352{
353 int i;
354
355 if (n == NULL)
356 return;
357 if (ISNONTERMINAL(TYPE(n))) {
358 for (i = 0; i < NCH(n); i++)
359 showtree(g, CHILD(n, i));
360 }
361 else if (ISTERMINAL(TYPE(n))) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000362 printf("%s", _PyParser_TokenNames[TYPE(n)]);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000363 if (TYPE(n) == NUMBER || TYPE(n) == NAME)
364 printf("(%s)", STR(n));
365 printf(" ");
366 }
367 else
368 printf("? ");
369}
370
371void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000372printtree(parser_state *ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000373{
Guido van Rossum86bea461997-04-29 21:03:06 +0000374 if (Py_DebugFlag) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000375 printf("Parse tree:\n");
376 dumptree(ps->p_grammar, ps->p_tree);
377 printf("\n");
378 printf("Tokens:\n");
379 showtree(ps->p_grammar, ps->p_tree);
380 printf("\n");
381 }
382 printf("Listing:\n");
Guido van Rossum86bea461997-04-29 21:03:06 +0000383 PyNode_ListTree(ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000384 printf("\n");
385}
386
Guido van Rossum408027e1996-12-30 16:17:54 +0000387#endif /* Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000388
389/*
390
391Description
392-----------
393
394The parser's interface is different than usual: the function addtoken()
395must be called for each token in the input. This makes it possible to
396turn it into an incremental parsing system later. The parsing system
397constructs a parse tree as it goes.
398
399A parsing rule is represented as a Deterministic Finite-state Automaton
400(DFA). A node in a DFA represents a state of the parser; an arc represents
401a transition. Transitions are either labeled with terminal symbols or
402with non-terminals. When the parser decides to follow an arc labeled
403with a non-terminal, it is invoked recursively with the DFA representing
404the parsing rule for that as its initial state; when that DFA accepts,
405the parser that invoked it continues. The parse tree constructed by the
406recursively called parser is inserted as a child in the current parse tree.
407
408The DFA's can be constructed automatically from a more conventional
409language description. An extended LL(1) grammar (ELL(1)) is suitable.
410Certain restrictions make the parser's life easier: rules that can produce
411the empty string should be outlawed (there are other ways to put loops
412or optional parts in the language). To avoid the need to construct
413FIRST sets, we can require that all but the last alternative of a rule
414(really: arc going out of a DFA's state) must begin with a terminal
415symbol.
416
417As an example, consider this grammar:
418
419expr: term (OP term)*
420term: CONSTANT | '(' expr ')'
421
422The DFA corresponding to the rule for expr is:
423
424------->.---term-->.------->
425 ^ |
426 | |
427 \----OP----/
428
429The parse tree generated for the input a+b is:
430
431(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
432
433*/