blob: 41072c478c26df90018a9e20cb4adf6efb1e1e38 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* Parser implementation */
3
4/* For a description, see the comments at end of this file */
5
6/* XXX To do: error recovery */
7
Tim Peters1ca12962001-12-04 03:18:48 +00008#include "Python.h"
Guido van Rossum3f5da241990-12-20 15:06:42 +00009#include "pgenheaders.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000010#include "token.h"
11#include "grammar.h"
12#include "node.h"
13#include "parser.h"
14#include "errcode.h"
15
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000016
Guido van Rossum408027e1996-12-30 16:17:54 +000017#ifdef Py_DEBUG
Guido van Rossum86bea461997-04-29 21:03:06 +000018extern int Py_DebugFlag;
19#define D(x) if (!Py_DebugFlag); else x
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000020#else
21#define D(x)
22#endif
23
24
25/* STACK DATA TYPE */
26
Tim Petersdbd9ba62000-07-09 03:09:57 +000027static void s_reset(stack *);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000028
29static void
Thomas Wouters23c9e002000-07-22 19:20:54 +000030s_reset(stack *s)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000031{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000032 s->s_top = &s->s_base[MAXSTACK];
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000033}
34
35#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
36
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000037static int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020038s_push(stack *s, dfa *d, node *parent)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000039{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020040 stackentry *top;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000041 if (s->s_top == s->s_base) {
42 fprintf(stderr, "s_push: parser stack overflow\n");
43 return E_NOMEM;
44 }
45 top = --s->s_top;
46 top->s_dfa = d;
47 top->s_parent = parent;
48 top->s_state = 0;
49 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000050}
51
Guido van Rossum408027e1996-12-30 16:17:54 +000052#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054static void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020055s_pop(stack *s)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000056{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000057 if (s_empty(s))
58 Py_FatalError("s_pop: parser stack underflow -- FATAL");
59 s->s_top++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000060}
61
Guido van Rossum408027e1996-12-30 16:17:54 +000062#else /* !Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000063
64#define s_pop(s) (s)->s_top++
65
66#endif
67
68
69/* PARSER CREATION */
70
71parser_state *
Thomas Wouters23c9e002000-07-22 19:20:54 +000072PyParser_New(grammar *g, int start)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000073{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 parser_state *ps;
75
76 if (!g->g_accel)
77 PyGrammar_AddAccelerators(g);
78 ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
79 if (ps == NULL)
80 return NULL;
81 ps->p_grammar = g;
Thomas Wouters34aa7ba2006-02-28 19:02:24 +000082#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 ps->p_flags = 0;
Neil Schemenauerc155dd42002-03-22 23:38:11 +000084#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 ps->p_tree = PyNode_New(start);
86 if (ps->p_tree == NULL) {
87 PyMem_FREE(ps);
88 return NULL;
89 }
90 s_reset(&ps->p_stack);
91 (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
92 return ps;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000093}
94
95void
Thomas Wouters23c9e002000-07-22 19:20:54 +000096PyParser_Delete(parser_state *ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000097{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000098 /* NB If you want to save the parse tree,
99 you must set p_tree to NULL before calling delparser! */
100 PyNode_Free(ps->p_tree);
101 PyMem_FREE(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102}
103
104
105/* PARSER STACK OPERATIONS */
106
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107static int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200108shift(stack *s, int type, char *str, int newstate, int lineno, int col_offset)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000109{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000110 int err;
111 assert(!s_empty(s));
112 err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
113 if (err)
114 return err;
115 s->s_top->s_state = newstate;
116 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000117}
118
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000119static int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200120push(stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000121{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 int err;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200123 node *n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000124 n = s->s_top->s_parent;
125 assert(!s_empty(s));
126 err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
127 if (err)
128 return err;
129 s->s_top->s_state = newstate;
130 return s_push(s, d, CHILD(n, NCH(n)-1));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000131}
132
133
134/* PARSER PROPER */
135
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000136static int
Serhiy Storchakac6792272013-10-19 21:03:34 +0300137classify(parser_state *ps, int type, const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000138{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 grammar *g = ps->p_grammar;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200140 int n = g->g_ll.ll_nlabels;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141
142 if (type == NAME) {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200143 label *l = g->g_ll.ll_label;
144 int i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 for (i = n; i > 0; i--, l++) {
146 if (l->lb_type != NAME || l->lb_str == NULL ||
Berker Peksag2a65ecb2016-03-28 00:45:28 +0300147 l->lb_str[0] != str[0] ||
148 strcmp(l->lb_str, str) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 continue;
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000150#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Brett Cannone3944a52009-04-01 05:08:41 +0000151#if 0
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 /* Leaving this in as an example */
153 if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) {
Berker Peksag2a65ecb2016-03-28 00:45:28 +0300154 if (str[0] == 'w' && strcmp(str, "with") == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000155 break; /* not a keyword yet */
Berker Peksag2a65ecb2016-03-28 00:45:28 +0300156 else if (str[0] == 'a' && strcmp(str, "as") == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000157 break; /* not a keyword yet */
158 }
Thomas Wouters8ae12952006-02-28 22:42:15 +0000159#endif
Brett Cannone3944a52009-04-01 05:08:41 +0000160#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 D(printf("It's a keyword\n"));
162 return n - i;
163 }
164 }
165
166 {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200167 label *l = g->g_ll.ll_label;
168 int i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000169 for (i = n; i > 0; i--, l++) {
170 if (l->lb_type == type && l->lb_str == NULL) {
171 D(printf("It's a token we know\n"));
172 return n - i;
173 }
174 }
175 }
176
177 D(printf("Illegal token\n"));
178 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000179}
180
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000181#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Brett Cannone3944a52009-04-01 05:08:41 +0000182#if 0
Guido van Rossum45aecf42006-03-15 04:58:47 +0000183/* Leaving this in as an example */
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000184static void
185future_hack(parser_state *ps)
186{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 node *n = ps->p_stack.s_top->s_parent;
188 node *ch, *cch;
189 int i;
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 /* from __future__ import ..., must have at least 4 children */
192 n = CHILD(n, 0);
193 if (NCH(n) < 4)
194 return;
195 ch = CHILD(n, 0);
196 if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
197 return;
198 ch = CHILD(n, 1);
199 if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
200 strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
201 return;
202 ch = CHILD(n, 3);
203 /* ch can be a star, a parenthesis or import_as_names */
204 if (TYPE(ch) == STAR)
205 return;
206 if (TYPE(ch) == LPAR)
207 ch = CHILD(n, 4);
208
209 for (i = 0; i < NCH(ch); i += 2) {
210 cch = CHILD(ch, i);
211 if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
212 char *str_ch = STR(CHILD(cch, 0));
213 if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
214 ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
215 } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
216 ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
217 } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
218 ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
219 }
220 }
221 }
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000222}
Brett Cannone3944a52009-04-01 05:08:41 +0000223#endif
Neil Schemenauerc155dd42002-03-22 23:38:11 +0000224#endif /* future keyword */
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000225
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000226int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200227PyParser_AddToken(parser_state *ps, int type, char *str,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 int lineno, int col_offset, int *expected_ret)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000229{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200230 int ilabel;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000231 int err;
232
233 D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
234
235 /* Find out which label this token is */
236 ilabel = classify(ps, type, str);
237 if (ilabel < 0)
238 return E_SYNTAX;
239
240 /* Loop until the token is shifted or an error occurred */
241 for (;;) {
242 /* Fetch the current dfa and state */
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200243 dfa *d = ps->p_stack.s_top->s_dfa;
244 state *s = &d->d_state[ps->p_stack.s_top->s_state];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000245
246 D(printf(" DFA '%s', state %d:",
247 d->d_name, ps->p_stack.s_top->s_state));
248
249 /* Check accelerator */
250 if (s->s_lower <= ilabel && ilabel < s->s_upper) {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200251 int x = s->s_accel[ilabel - s->s_lower];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 if (x != -1) {
253 if (x & (1<<7)) {
254 /* Push non-terminal */
255 int nt = (x >> 8) + NT_OFFSET;
256 int arrow = x & ((1<<7)-1);
257 dfa *d1 = PyGrammar_FindDFA(
258 ps->p_grammar, nt);
259 if ((err = push(&ps->p_stack, nt, d1,
260 arrow, lineno, col_offset)) > 0) {
261 D(printf(" MemError: push\n"));
262 return err;
263 }
264 D(printf(" Push ...\n"));
265 continue;
266 }
267
268 /* Shift the token */
269 if ((err = shift(&ps->p_stack, type, str,
270 x, lineno, col_offset)) > 0) {
271 D(printf(" MemError: shift.\n"));
272 return err;
273 }
274 D(printf(" Shift.\n"));
275 /* Pop while we are in an accept-only state */
276 while (s = &d->d_state
277 [ps->p_stack.s_top->s_state],
278 s->s_accept && s->s_narcs == 1) {
279 D(printf(" DFA '%s', state %d: "
280 "Direct pop.\n",
281 d->d_name,
282 ps->p_stack.s_top->s_state));
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000283#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Brett Cannone3944a52009-04-01 05:08:41 +0000284#if 0
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000285 if (d->d_name[0] == 'i' &&
286 strcmp(d->d_name,
287 "import_stmt") == 0)
288 future_hack(ps);
Neil Schemenauerc155dd42002-03-22 23:38:11 +0000289#endif
Brett Cannone3944a52009-04-01 05:08:41 +0000290#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 s_pop(&ps->p_stack);
292 if (s_empty(&ps->p_stack)) {
293 D(printf(" ACCEPT.\n"));
294 return E_DONE;
295 }
296 d = ps->p_stack.s_top->s_dfa;
297 }
298 return E_OK;
299 }
300 }
301
302 if (s->s_accept) {
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000303#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Brett Cannone3944a52009-04-01 05:08:41 +0000304#if 0
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000305 if (d->d_name[0] == 'i' &&
306 strcmp(d->d_name, "import_stmt") == 0)
307 future_hack(ps);
Neil Schemenauerc155dd42002-03-22 23:38:11 +0000308#endif
Brett Cannone3944a52009-04-01 05:08:41 +0000309#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000310 /* Pop this dfa and try again */
311 s_pop(&ps->p_stack);
312 D(printf(" Pop ...\n"));
313 if (s_empty(&ps->p_stack)) {
314 D(printf(" Error: bottom of stack.\n"));
315 return E_SYNTAX;
316 }
317 continue;
318 }
319
320 /* Stuck, report syntax error */
321 D(printf(" Error.\n"));
322 if (expected_ret) {
323 if (s->s_lower == s->s_upper - 1) {
324 /* Only one possible expected token */
325 *expected_ret = ps->p_grammar->
326 g_ll.ll_label[s->s_lower].lb_type;
327 }
328 else
329 *expected_ret = -1;
330 }
331 return E_SYNTAX;
332 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000333}
334
335
Guido van Rossum408027e1996-12-30 16:17:54 +0000336#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000337
338/* DEBUG OUTPUT */
339
340void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000341dumptree(grammar *g, node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000342{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000343 int i;
344
345 if (n == NULL)
346 printf("NIL");
347 else {
348 label l;
349 l.lb_type = TYPE(n);
350 l.lb_str = STR(n);
351 printf("%s", PyGrammar_LabelRepr(&l));
352 if (ISNONTERMINAL(TYPE(n))) {
353 printf("(");
354 for (i = 0; i < NCH(n); i++) {
355 if (i > 0)
356 printf(",");
357 dumptree(g, CHILD(n, i));
358 }
359 printf(")");
360 }
361 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000362}
363
364void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000365showtree(grammar *g, node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000366{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 int i;
368
369 if (n == NULL)
370 return;
371 if (ISNONTERMINAL(TYPE(n))) {
372 for (i = 0; i < NCH(n); i++)
373 showtree(g, CHILD(n, i));
374 }
375 else if (ISTERMINAL(TYPE(n))) {
376 printf("%s", _PyParser_TokenNames[TYPE(n)]);
377 if (TYPE(n) == NUMBER || TYPE(n) == NAME)
378 printf("(%s)", STR(n));
379 printf(" ");
380 }
381 else
382 printf("? ");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000383}
384
385void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000386printtree(parser_state *ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000387{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000388 if (Py_DebugFlag) {
389 printf("Parse tree:\n");
390 dumptree(ps->p_grammar, ps->p_tree);
391 printf("\n");
392 printf("Tokens:\n");
393 showtree(ps->p_grammar, ps->p_tree);
394 printf("\n");
395 }
396 printf("Listing:\n");
397 PyNode_ListTree(ps->p_tree);
398 printf("\n");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000399}
400
Guido van Rossum408027e1996-12-30 16:17:54 +0000401#endif /* Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000402
403/*
404
405Description
406-----------
407
408The parser's interface is different than usual: the function addtoken()
409must be called for each token in the input. This makes it possible to
410turn it into an incremental parsing system later. The parsing system
411constructs a parse tree as it goes.
412
413A parsing rule is represented as a Deterministic Finite-state Automaton
414(DFA). A node in a DFA represents a state of the parser; an arc represents
415a transition. Transitions are either labeled with terminal symbols or
416with non-terminals. When the parser decides to follow an arc labeled
417with a non-terminal, it is invoked recursively with the DFA representing
418the parsing rule for that as its initial state; when that DFA accepts,
419the parser that invoked it continues. The parse tree constructed by the
420recursively called parser is inserted as a child in the current parse tree.
421
422The DFA's can be constructed automatically from a more conventional
423language description. An extended LL(1) grammar (ELL(1)) is suitable.
424Certain restrictions make the parser's life easier: rules that can produce
425the empty string should be outlawed (there are other ways to put loops
426or optional parts in the language). To avoid the need to construct
427FIRST sets, we can require that all but the last alternative of a rule
428(really: arc going out of a DFA's state) must begin with a terminal
429symbol.
430
431As an example, consider this grammar:
432
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000433expr: term (OP term)*
434term: CONSTANT | '(' expr ')'
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000435
436The DFA corresponding to the rule for expr is:
437
438------->.---term-->.------->
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000439 ^ |
440 | |
441 \----OP----/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000442
443The parse tree generated for the input a+b is:
444
445(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
446
447*/