blob: 8d52153c038e95dbd2df45ea9ed12e86f10065e6 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* Parser implementation */
3
4/* For a description, see the comments at end of this file */
5
6/* XXX To do: error recovery */
7
Tim Peters1ca12962001-12-04 03:18:48 +00008#include "Python.h"
Guido van Rossum3f5da241990-12-20 15:06:42 +00009#include "pgenheaders.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000010#include "token.h"
11#include "grammar.h"
12#include "node.h"
13#include "parser.h"
14#include "errcode.h"
15
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000016
Guido van Rossum408027e1996-12-30 16:17:54 +000017#ifdef Py_DEBUG
Guido van Rossum86bea461997-04-29 21:03:06 +000018extern int Py_DebugFlag;
19#define D(x) if (!Py_DebugFlag); else x
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000020#else
21#define D(x)
22#endif
23
24
25/* STACK DATA TYPE */
26
Tim Petersdbd9ba62000-07-09 03:09:57 +000027static void s_reset(stack *);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000028
29static void
Thomas Wouters23c9e002000-07-22 19:20:54 +000030s_reset(stack *s)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000031{
32 s->s_top = &s->s_base[MAXSTACK];
33}
34
35#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
36
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000037static int
Thomas Wouters23c9e002000-07-22 19:20:54 +000038s_push(register stack *s, dfa *d, node *parent)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000039{
40 register stackentry *top;
41 if (s->s_top == s->s_base) {
42 fprintf(stderr, "s_push: parser stack overflow\n");
Guido van Rossume3c3b272000-10-02 10:21:59 +000043 return E_NOMEM;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000044 }
45 top = --s->s_top;
46 top->s_dfa = d;
47 top->s_parent = parent;
48 top->s_state = 0;
49 return 0;
50}
51
Guido van Rossum408027e1996-12-30 16:17:54 +000052#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054static void
Thomas Wouters23c9e002000-07-22 19:20:54 +000055s_pop(register stack *s)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000056{
Guido van Rossum588633d1994-12-30 15:46:02 +000057 if (s_empty(s))
Guido van Rossum86bea461997-04-29 21:03:06 +000058 Py_FatalError("s_pop: parser stack underflow -- FATAL");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000059 s->s_top++;
60}
61
Guido van Rossum408027e1996-12-30 16:17:54 +000062#else /* !Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000063
64#define s_pop(s) (s)->s_top++
65
66#endif
67
68
69/* PARSER CREATION */
70
71parser_state *
Thomas Wouters23c9e002000-07-22 19:20:54 +000072PyParser_New(grammar *g, int start)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000073{
74 parser_state *ps;
75
76 if (!g->g_accel)
Guido van Rossum86bea461997-04-29 21:03:06 +000077 PyGrammar_AddAccelerators(g);
Anthony Baxter11490022006-04-11 05:39:14 +000078 ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000079 if (ps == NULL)
80 return NULL;
81 ps->p_grammar = g;
Thomas Wouters34aa7ba2006-02-28 19:02:24 +000082#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
83 ps->p_flags = 0;
Neil Schemenauerc155dd42002-03-22 23:38:11 +000084#endif
Guido van Rossum86bea461997-04-29 21:03:06 +000085 ps->p_tree = PyNode_New(start);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000086 if (ps->p_tree == NULL) {
Neal Norwitz2c4e4f92006-04-10 06:42:25 +000087 PyMem_FREE(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000088 return NULL;
89 }
90 s_reset(&ps->p_stack);
Guido van Rossum86bea461997-04-29 21:03:06 +000091 (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000092 return ps;
93}
94
95void
Thomas Wouters23c9e002000-07-22 19:20:54 +000096PyParser_Delete(parser_state *ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000097{
Guido van Rossum99f02d41990-11-18 17:38:42 +000098 /* NB If you want to save the parse tree,
99 you must set p_tree to NULL before calling delparser! */
Guido van Rossum86bea461997-04-29 21:03:06 +0000100 PyNode_Free(ps->p_tree);
Neal Norwitz2c4e4f92006-04-10 06:42:25 +0000101 PyMem_FREE(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102}
103
104
105/* PARSER STACK OPERATIONS */
106
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107static int
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000108shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000109{
Jeremy Hylton94988062000-06-20 19:10:44 +0000110 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000111 assert(!s_empty(s));
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000112 err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
Jeremy Hylton94988062000-06-20 19:10:44 +0000113 if (err)
114 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000115 s->s_top->s_state = newstate;
116 return 0;
117}
118
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000119static int
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000120push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000121{
Jeremy Hylton94988062000-06-20 19:10:44 +0000122 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000123 register node *n;
124 n = s->s_top->s_parent;
125 assert(!s_empty(s));
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000126 err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
Jeremy Hylton94988062000-06-20 19:10:44 +0000127 if (err)
128 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000129 s->s_top->s_state = newstate;
130 return s_push(s, d, CHILD(n, NCH(n)-1));
131}
132
133
134/* PARSER PROPER */
135
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000136static int
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000137classify(parser_state *ps, int type, char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000138{
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000139 grammar *g = ps->p_grammar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000140 register int n = g->g_ll.ll_nlabels;
141
142 if (type == NAME) {
143 register char *s = str;
144 register label *l = g->g_ll.ll_label;
145 register int i;
146 for (i = n; i > 0; i--, l++) {
Thomas Wouters8ae12952006-02-28 22:42:15 +0000147 if (l->lb_type != NAME || l->lb_str == NULL ||
148 l->lb_str[0] != s[0] ||
149 strcmp(l->lb_str, s) != 0)
150 continue;
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000151#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Eric Smith7c478942008-03-18 23:45:49 +0000152 if (ps->p_flags & CO_FUTURE_PRINT_FUNCTION &&
153 s[0] == 'p' && strcmp(s, "print") == 0) {
154 break; /* no longer a keyword */
155 }
Thomas Wouters8ae12952006-02-28 22:42:15 +0000156#endif
157 D(printf("It's a keyword\n"));
158 return n - i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000159 }
160 }
161
162 {
163 register label *l = g->g_ll.ll_label;
164 register int i;
165 for (i = n; i > 0; i--, l++) {
166 if (l->lb_type == type && l->lb_str == NULL) {
167 D(printf("It's a token we know\n"));
168 return n - i;
169 }
170 }
171 }
172
173 D(printf("Illegal token\n"));
174 return -1;
175}
176
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000177#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000178static void
179future_hack(parser_state *ps)
180{
181 node *n = ps->p_stack.s_top->s_parent;
Georg Brandla10d3af2006-09-24 12:35:36 +0000182 node *ch, *cch;
Guido van Rossum3c033232001-07-19 15:27:45 +0000183 int i;
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000184
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000185 /* from __future__ import ..., must have at least 4 children */
186 n = CHILD(n, 0);
187 if (NCH(n) < 4)
188 return;
189 ch = CHILD(n, 0);
190 if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000191 return;
192 ch = CHILD(n, 1);
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000193 if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
194 strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000195 return;
Georg Brandla10d3af2006-09-24 12:35:36 +0000196 ch = CHILD(n, 3);
197 /* ch can be a star, a parenthesis or import_as_names */
198 if (TYPE(ch) == STAR)
199 return;
200 if (TYPE(ch) == LPAR)
201 ch = CHILD(n, 4);
202
203 for (i = 0; i < NCH(ch); i += 2) {
204 cch = CHILD(ch, i);
Christian Heimes3c608332008-03-26 22:01:37 +0000205 if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
206 char *str_ch = STR(CHILD(cch, 0));
207 if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
208 ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
209 break;
210 } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
211 ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
212 break;
213 } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
214 ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
215 break;
216 }
Guido van Rossum3c033232001-07-19 15:27:45 +0000217 }
218 }
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000219}
Neil Schemenauerc155dd42002-03-22 23:38:11 +0000220#endif /* future keyword */
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000221
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000222int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000223PyParser_AddToken(register parser_state *ps, register int type, char *str,
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000224 int lineno, int col_offset, int *expected_ret)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000225{
226 register int ilabel;
Jeremy Hylton94988062000-06-20 19:10:44 +0000227 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000228
Guido van Rossum86bea461997-04-29 21:03:06 +0000229 D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000230
231 /* Find out which label this token is */
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000232 ilabel = classify(ps, type, str);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000233 if (ilabel < 0)
234 return E_SYNTAX;
235
236 /* Loop until the token is shifted or an error occurred */
237 for (;;) {
238 /* Fetch the current dfa and state */
239 register dfa *d = ps->p_stack.s_top->s_dfa;
240 register state *s = &d->d_state[ps->p_stack.s_top->s_state];
241
242 D(printf(" DFA '%s', state %d:",
243 d->d_name, ps->p_stack.s_top->s_state));
244
245 /* Check accelerator */
246 if (s->s_lower <= ilabel && ilabel < s->s_upper) {
247 register int x = s->s_accel[ilabel - s->s_lower];
248 if (x != -1) {
249 if (x & (1<<7)) {
250 /* Push non-terminal */
251 int nt = (x >> 8) + NT_OFFSET;
252 int arrow = x & ((1<<7)-1);
Guido van Rossum86bea461997-04-29 21:03:06 +0000253 dfa *d1 = PyGrammar_FindDFA(
254 ps->p_grammar, nt);
Jeremy Hylton94988062000-06-20 19:10:44 +0000255 if ((err = push(&ps->p_stack, nt, d1,
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000256 arrow, lineno, col_offset)) > 0) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000257 D(printf(" MemError: push\n"));
Jeremy Hylton94988062000-06-20 19:10:44 +0000258 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000259 }
260 D(printf(" Push ...\n"));
261 continue;
262 }
263
264 /* Shift the token */
Jeremy Hylton94988062000-06-20 19:10:44 +0000265 if ((err = shift(&ps->p_stack, type, str,
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000266 x, lineno, col_offset)) > 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000267 D(printf(" MemError: shift.\n"));
Jeremy Hylton94988062000-06-20 19:10:44 +0000268 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000269 }
270 D(printf(" Shift.\n"));
271 /* Pop while we are in an accept-only state */
272 while (s = &d->d_state
273 [ps->p_stack.s_top->s_state],
274 s->s_accept && s->s_narcs == 1) {
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000275 D(printf(" DFA '%s', state %d: "
276 "Direct pop.\n",
277 d->d_name,
278 ps->p_stack.s_top->s_state));
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000279#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000280 if (d->d_name[0] == 'i' &&
281 strcmp(d->d_name,
282 "import_stmt") == 0)
283 future_hack(ps);
Neil Schemenauerc155dd42002-03-22 23:38:11 +0000284#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000285 s_pop(&ps->p_stack);
286 if (s_empty(&ps->p_stack)) {
287 D(printf(" ACCEPT.\n"));
288 return E_DONE;
289 }
290 d = ps->p_stack.s_top->s_dfa;
291 }
292 return E_OK;
293 }
294 }
295
296 if (s->s_accept) {
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000297#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000298 if (d->d_name[0] == 'i' &&
299 strcmp(d->d_name, "import_stmt") == 0)
300 future_hack(ps);
Neil Schemenauerc155dd42002-03-22 23:38:11 +0000301#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000302 /* Pop this dfa and try again */
303 s_pop(&ps->p_stack);
304 D(printf(" Pop ...\n"));
305 if (s_empty(&ps->p_stack)) {
306 D(printf(" Error: bottom of stack.\n"));
307 return E_SYNTAX;
308 }
309 continue;
310 }
311
312 /* Stuck, report syntax error */
313 D(printf(" Error.\n"));
Fred Drake85f36392000-07-11 17:53:00 +0000314 if (expected_ret) {
315 if (s->s_lower == s->s_upper - 1) {
316 /* Only one possible expected token */
317 *expected_ret = ps->p_grammar->
318 g_ll.ll_label[s->s_lower].lb_type;
319 }
320 else
321 *expected_ret = -1;
322 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000323 return E_SYNTAX;
324 }
325}
326
327
Guido van Rossum408027e1996-12-30 16:17:54 +0000328#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000329
330/* DEBUG OUTPUT */
331
332void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000333dumptree(grammar *g, node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334{
335 int i;
336
337 if (n == NULL)
338 printf("NIL");
339 else {
340 label l;
341 l.lb_type = TYPE(n);
Guido van Rossumcf7448b1992-09-03 20:46:37 +0000342 l.lb_str = STR(n);
Guido van Rossum86bea461997-04-29 21:03:06 +0000343 printf("%s", PyGrammar_LabelRepr(&l));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000344 if (ISNONTERMINAL(TYPE(n))) {
345 printf("(");
346 for (i = 0; i < NCH(n); i++) {
347 if (i > 0)
348 printf(",");
349 dumptree(g, CHILD(n, i));
350 }
351 printf(")");
352 }
353 }
354}
355
356void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000357showtree(grammar *g, node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000358{
359 int i;
360
361 if (n == NULL)
362 return;
363 if (ISNONTERMINAL(TYPE(n))) {
364 for (i = 0; i < NCH(n); i++)
365 showtree(g, CHILD(n, i));
366 }
367 else if (ISTERMINAL(TYPE(n))) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000368 printf("%s", _PyParser_TokenNames[TYPE(n)]);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000369 if (TYPE(n) == NUMBER || TYPE(n) == NAME)
370 printf("(%s)", STR(n));
371 printf(" ");
372 }
373 else
374 printf("? ");
375}
376
377void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000378printtree(parser_state *ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000379{
Guido van Rossum86bea461997-04-29 21:03:06 +0000380 if (Py_DebugFlag) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000381 printf("Parse tree:\n");
382 dumptree(ps->p_grammar, ps->p_tree);
383 printf("\n");
384 printf("Tokens:\n");
385 showtree(ps->p_grammar, ps->p_tree);
386 printf("\n");
387 }
388 printf("Listing:\n");
Guido van Rossum86bea461997-04-29 21:03:06 +0000389 PyNode_ListTree(ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000390 printf("\n");
391}
392
Guido van Rossum408027e1996-12-30 16:17:54 +0000393#endif /* Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000394
395/*
396
397Description
398-----------
399
400The parser's interface is different than usual: the function addtoken()
401must be called for each token in the input. This makes it possible to
402turn it into an incremental parsing system later. The parsing system
403constructs a parse tree as it goes.
404
405A parsing rule is represented as a Deterministic Finite-state Automaton
406(DFA). A node in a DFA represents a state of the parser; an arc represents
407a transition. Transitions are either labeled with terminal symbols or
408with non-terminals. When the parser decides to follow an arc labeled
409with a non-terminal, it is invoked recursively with the DFA representing
410the parsing rule for that as its initial state; when that DFA accepts,
411the parser that invoked it continues. The parse tree constructed by the
412recursively called parser is inserted as a child in the current parse tree.
413
414The DFA's can be constructed automatically from a more conventional
415language description. An extended LL(1) grammar (ELL(1)) is suitable.
416Certain restrictions make the parser's life easier: rules that can produce
417the empty string should be outlawed (there are other ways to put loops
418or optional parts in the language). To avoid the need to construct
419FIRST sets, we can require that all but the last alternative of a rule
420(really: arc going out of a DFA's state) must begin with a terminal
421symbol.
422
423As an example, consider this grammar:
424
425expr: term (OP term)*
426term: CONSTANT | '(' expr ')'
427
428The DFA corresponding to the rule for expr is:
429
430------->.---term-->.------->
431 ^ |
432 | |
433 \----OP----/
434
435The parse tree generated for the input a+b is:
436
437(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
438
439*/