blob: a9916d392aabb28fa4ce0401e8d87aefc75fce5b [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* Parser implementation */
3
4/* For a description, see the comments at end of this file */
5
6/* XXX To do: error recovery */
7
Tim Peters1ca12962001-12-04 03:18:48 +00008#include "Python.h"
Guido van Rossum3f5da241990-12-20 15:06:42 +00009#include "pgenheaders.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000010#include "token.h"
11#include "grammar.h"
12#include "node.h"
13#include "parser.h"
14#include "errcode.h"
15
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000016
Guido van Rossum408027e1996-12-30 16:17:54 +000017#ifdef Py_DEBUG
Guido van Rossum86bea461997-04-29 21:03:06 +000018extern int Py_DebugFlag;
19#define D(x) if (!Py_DebugFlag); else x
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000020#else
21#define D(x)
22#endif
23
24
25/* STACK DATA TYPE */
26
Tim Petersdbd9ba62000-07-09 03:09:57 +000027static void s_reset(stack *);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000028
29static void
Thomas Wouters23c9e002000-07-22 19:20:54 +000030s_reset(stack *s)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000031{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000032 s->s_top = &s->s_base[MAXSTACK];
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000033}
34
35#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
36
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000037static int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020038s_push(stack *s, dfa *d, node *parent)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000039{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020040 stackentry *top;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000041 if (s->s_top == s->s_base) {
42 fprintf(stderr, "s_push: parser stack overflow\n");
43 return E_NOMEM;
44 }
45 top = --s->s_top;
46 top->s_dfa = d;
47 top->s_parent = parent;
48 top->s_state = 0;
49 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000050}
51
Guido van Rossum408027e1996-12-30 16:17:54 +000052#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054static void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020055s_pop(stack *s)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000056{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000057 if (s_empty(s))
58 Py_FatalError("s_pop: parser stack underflow -- FATAL");
59 s->s_top++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000060}
61
Guido van Rossum408027e1996-12-30 16:17:54 +000062#else /* !Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000063
64#define s_pop(s) (s)->s_top++
65
66#endif
67
68
69/* PARSER CREATION */
70
71parser_state *
Thomas Wouters23c9e002000-07-22 19:20:54 +000072PyParser_New(grammar *g, int start)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000073{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 parser_state *ps;
75
76 if (!g->g_accel)
77 PyGrammar_AddAccelerators(g);
78 ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
79 if (ps == NULL)
80 return NULL;
81 ps->p_grammar = g;
Thomas Wouters34aa7ba2006-02-28 19:02:24 +000082#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 ps->p_flags = 0;
Neil Schemenauerc155dd42002-03-22 23:38:11 +000084#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 ps->p_tree = PyNode_New(start);
86 if (ps->p_tree == NULL) {
87 PyMem_FREE(ps);
88 return NULL;
89 }
90 s_reset(&ps->p_stack);
91 (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
92 return ps;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000093}
94
95void
Thomas Wouters23c9e002000-07-22 19:20:54 +000096PyParser_Delete(parser_state *ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000097{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000098 /* NB If you want to save the parse tree,
99 you must set p_tree to NULL before calling delparser! */
100 PyNode_Free(ps->p_tree);
101 PyMem_FREE(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102}
103
104
105/* PARSER STACK OPERATIONS */
106
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107static int
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000108shift(stack *s, int type, char *str, int newstate, int lineno, int col_offset,
109 int end_lineno, int end_col_offset)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000110{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 int err;
112 assert(!s_empty(s));
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000113 err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset,
114 end_lineno, end_col_offset);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 if (err)
116 return err;
117 s->s_top->s_state = newstate;
118 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000119}
120
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000121static int
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000122push(stack *s, int type, dfa *d, int newstate, int lineno, int col_offset,
123 int end_lineno, int end_col_offset)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000124{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000125 int err;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200126 node *n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000127 n = s->s_top->s_parent;
128 assert(!s_empty(s));
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000129 err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset,
130 end_lineno, end_col_offset);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000131 if (err)
132 return err;
133 s->s_top->s_state = newstate;
134 return s_push(s, d, CHILD(n, NCH(n)-1));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000135}
136
137
138/* PARSER PROPER */
139
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000140static int
Serhiy Storchakac6792272013-10-19 21:03:34 +0300141classify(parser_state *ps, int type, const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000142{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 grammar *g = ps->p_grammar;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200144 int n = g->g_ll.ll_nlabels;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145
146 if (type == NAME) {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200147 label *l = g->g_ll.ll_label;
148 int i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 for (i = n; i > 0; i--, l++) {
150 if (l->lb_type != NAME || l->lb_str == NULL ||
Berker Peksag2a65ecb2016-03-28 00:45:28 +0300151 l->lb_str[0] != str[0] ||
152 strcmp(l->lb_str, str) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000153 continue;
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000154#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Brett Cannone3944a52009-04-01 05:08:41 +0000155#if 0
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 /* Leaving this in as an example */
157 if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) {
Berker Peksag2a65ecb2016-03-28 00:45:28 +0300158 if (str[0] == 'w' && strcmp(str, "with") == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000159 break; /* not a keyword yet */
Berker Peksag2a65ecb2016-03-28 00:45:28 +0300160 else if (str[0] == 'a' && strcmp(str, "as") == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 break; /* not a keyword yet */
162 }
Thomas Wouters8ae12952006-02-28 22:42:15 +0000163#endif
Brett Cannone3944a52009-04-01 05:08:41 +0000164#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 D(printf("It's a keyword\n"));
166 return n - i;
167 }
168 }
169
170 {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200171 label *l = g->g_ll.ll_label;
172 int i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 for (i = n; i > 0; i--, l++) {
174 if (l->lb_type == type && l->lb_str == NULL) {
175 D(printf("It's a token we know\n"));
176 return n - i;
177 }
178 }
179 }
180
181 D(printf("Illegal token\n"));
182 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000183}
184
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000185#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Brett Cannone3944a52009-04-01 05:08:41 +0000186#if 0
Guido van Rossum45aecf42006-03-15 04:58:47 +0000187/* Leaving this in as an example */
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000188static void
189future_hack(parser_state *ps)
190{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 node *n = ps->p_stack.s_top->s_parent;
192 node *ch, *cch;
193 int i;
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000194
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000195 /* from __future__ import ..., must have at least 4 children */
196 n = CHILD(n, 0);
197 if (NCH(n) < 4)
198 return;
199 ch = CHILD(n, 0);
200 if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
201 return;
202 ch = CHILD(n, 1);
203 if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
204 strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
205 return;
206 ch = CHILD(n, 3);
207 /* ch can be a star, a parenthesis or import_as_names */
208 if (TYPE(ch) == STAR)
209 return;
210 if (TYPE(ch) == LPAR)
211 ch = CHILD(n, 4);
212
213 for (i = 0; i < NCH(ch); i += 2) {
214 cch = CHILD(ch, i);
215 if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
216 char *str_ch = STR(CHILD(cch, 0));
217 if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
218 ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
219 } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
220 ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
221 } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
222 ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
223 }
224 }
225 }
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000226}
Brett Cannone3944a52009-04-01 05:08:41 +0000227#endif
Neil Schemenauerc155dd42002-03-22 23:38:11 +0000228#endif /* future keyword */
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000229
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000230int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200231PyParser_AddToken(parser_state *ps, int type, char *str,
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000232 int lineno, int col_offset,
233 int end_lineno, int end_col_offset,
234 int *expected_ret)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000235{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200236 int ilabel;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 int err;
238
239 D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
240
241 /* Find out which label this token is */
242 ilabel = classify(ps, type, str);
243 if (ilabel < 0)
244 return E_SYNTAX;
245
246 /* Loop until the token is shifted or an error occurred */
247 for (;;) {
248 /* Fetch the current dfa and state */
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200249 dfa *d = ps->p_stack.s_top->s_dfa;
250 state *s = &d->d_state[ps->p_stack.s_top->s_state];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000251
252 D(printf(" DFA '%s', state %d:",
253 d->d_name, ps->p_stack.s_top->s_state));
254
255 /* Check accelerator */
256 if (s->s_lower <= ilabel && ilabel < s->s_upper) {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200257 int x = s->s_accel[ilabel - s->s_lower];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000258 if (x != -1) {
259 if (x & (1<<7)) {
260 /* Push non-terminal */
261 int nt = (x >> 8) + NT_OFFSET;
262 int arrow = x & ((1<<7)-1);
263 dfa *d1 = PyGrammar_FindDFA(
264 ps->p_grammar, nt);
265 if ((err = push(&ps->p_stack, nt, d1,
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000266 arrow, lineno, col_offset,
267 end_lineno, end_col_offset)) > 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000268 D(printf(" MemError: push\n"));
269 return err;
270 }
271 D(printf(" Push ...\n"));
272 continue;
273 }
274
275 /* Shift the token */
276 if ((err = shift(&ps->p_stack, type, str,
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000277 x, lineno, col_offset,
278 end_lineno, end_col_offset)) > 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000279 D(printf(" MemError: shift.\n"));
280 return err;
281 }
282 D(printf(" Shift.\n"));
283 /* Pop while we are in an accept-only state */
284 while (s = &d->d_state
285 [ps->p_stack.s_top->s_state],
286 s->s_accept && s->s_narcs == 1) {
287 D(printf(" DFA '%s', state %d: "
288 "Direct pop.\n",
289 d->d_name,
290 ps->p_stack.s_top->s_state));
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000291#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Brett Cannone3944a52009-04-01 05:08:41 +0000292#if 0
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293 if (d->d_name[0] == 'i' &&
294 strcmp(d->d_name,
295 "import_stmt") == 0)
296 future_hack(ps);
Neil Schemenauerc155dd42002-03-22 23:38:11 +0000297#endif
Brett Cannone3944a52009-04-01 05:08:41 +0000298#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000299 s_pop(&ps->p_stack);
300 if (s_empty(&ps->p_stack)) {
301 D(printf(" ACCEPT.\n"));
302 return E_DONE;
303 }
304 d = ps->p_stack.s_top->s_dfa;
305 }
306 return E_OK;
307 }
308 }
309
310 if (s->s_accept) {
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000311#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Brett Cannone3944a52009-04-01 05:08:41 +0000312#if 0
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000313 if (d->d_name[0] == 'i' &&
314 strcmp(d->d_name, "import_stmt") == 0)
315 future_hack(ps);
Neil Schemenauerc155dd42002-03-22 23:38:11 +0000316#endif
Brett Cannone3944a52009-04-01 05:08:41 +0000317#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000318 /* Pop this dfa and try again */
319 s_pop(&ps->p_stack);
320 D(printf(" Pop ...\n"));
321 if (s_empty(&ps->p_stack)) {
322 D(printf(" Error: bottom of stack.\n"));
323 return E_SYNTAX;
324 }
325 continue;
326 }
327
328 /* Stuck, report syntax error */
329 D(printf(" Error.\n"));
330 if (expected_ret) {
331 if (s->s_lower == s->s_upper - 1) {
332 /* Only one possible expected token */
333 *expected_ret = ps->p_grammar->
334 g_ll.ll_label[s->s_lower].lb_type;
335 }
336 else
337 *expected_ret = -1;
338 }
339 return E_SYNTAX;
340 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000341}
342
343
Guido van Rossum408027e1996-12-30 16:17:54 +0000344#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000345
346/* DEBUG OUTPUT */
347
348void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000349dumptree(grammar *g, node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000350{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000351 int i;
352
353 if (n == NULL)
354 printf("NIL");
355 else {
356 label l;
357 l.lb_type = TYPE(n);
358 l.lb_str = STR(n);
359 printf("%s", PyGrammar_LabelRepr(&l));
360 if (ISNONTERMINAL(TYPE(n))) {
361 printf("(");
362 for (i = 0; i < NCH(n); i++) {
363 if (i > 0)
364 printf(",");
365 dumptree(g, CHILD(n, i));
366 }
367 printf(")");
368 }
369 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000370}
371
372void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000373showtree(grammar *g, node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000374{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000375 int i;
376
377 if (n == NULL)
378 return;
379 if (ISNONTERMINAL(TYPE(n))) {
380 for (i = 0; i < NCH(n); i++)
381 showtree(g, CHILD(n, i));
382 }
383 else if (ISTERMINAL(TYPE(n))) {
384 printf("%s", _PyParser_TokenNames[TYPE(n)]);
385 if (TYPE(n) == NUMBER || TYPE(n) == NAME)
386 printf("(%s)", STR(n));
387 printf(" ");
388 }
389 else
390 printf("? ");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000391}
392
393void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000394printtree(parser_state *ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000395{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 if (Py_DebugFlag) {
397 printf("Parse tree:\n");
398 dumptree(ps->p_grammar, ps->p_tree);
399 printf("\n");
400 printf("Tokens:\n");
401 showtree(ps->p_grammar, ps->p_tree);
402 printf("\n");
403 }
404 printf("Listing:\n");
405 PyNode_ListTree(ps->p_tree);
406 printf("\n");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000407}
408
Guido van Rossum408027e1996-12-30 16:17:54 +0000409#endif /* Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000410
411/*
412
413Description
414-----------
415
416The parser's interface is different than usual: the function addtoken()
417must be called for each token in the input. This makes it possible to
418turn it into an incremental parsing system later. The parsing system
419constructs a parse tree as it goes.
420
421A parsing rule is represented as a Deterministic Finite-state Automaton
422(DFA). A node in a DFA represents a state of the parser; an arc represents
423a transition. Transitions are either labeled with terminal symbols or
424with non-terminals. When the parser decides to follow an arc labeled
425with a non-terminal, it is invoked recursively with the DFA representing
426the parsing rule for that as its initial state; when that DFA accepts,
427the parser that invoked it continues. The parse tree constructed by the
428recursively called parser is inserted as a child in the current parse tree.
429
430The DFA's can be constructed automatically from a more conventional
431language description. An extended LL(1) grammar (ELL(1)) is suitable.
432Certain restrictions make the parser's life easier: rules that can produce
433the empty string should be outlawed (there are other ways to put loops
434or optional parts in the language). To avoid the need to construct
435FIRST sets, we can require that all but the last alternative of a rule
436(really: arc going out of a DFA's state) must begin with a terminal
437symbol.
438
439As an example, consider this grammar:
440
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000441expr: term (OP term)*
442term: CONSTANT | '(' expr ')'
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000443
444The DFA corresponding to the rule for expr is:
445
446------->.---term-->.------->
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000447 ^ |
448 | |
449 \----OP----/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000450
451The parse tree generated for the input a+b is:
452
453(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
454
455*/