blob: b753a177c8be703c2457f3ee0e52f62657e48921 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* Parser implementation */
3
4/* For a description, see the comments at end of this file */
5
6/* XXX To do: error recovery */
7
Tim Peters1ca12962001-12-04 03:18:48 +00008#include "Python.h"
Guido van Rossum3f5da241990-12-20 15:06:42 +00009#include "pgenheaders.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000010#include "token.h"
11#include "grammar.h"
12#include "node.h"
13#include "parser.h"
14#include "errcode.h"
15
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000016
Guido van Rossum408027e1996-12-30 16:17:54 +000017#ifdef Py_DEBUG
Guido van Rossum86bea461997-04-29 21:03:06 +000018extern int Py_DebugFlag;
19#define D(x) if (!Py_DebugFlag); else x
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000020#else
21#define D(x)
22#endif
23
24
25/* STACK DATA TYPE */
26
Tim Petersdbd9ba62000-07-09 03:09:57 +000027static void s_reset(stack *);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000028
29static void
Thomas Wouters23c9e002000-07-22 19:20:54 +000030s_reset(stack *s)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000031{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000032 s->s_top = &s->s_base[MAXSTACK];
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000033}
34
35#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
36
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000037static int
Thomas Wouters23c9e002000-07-22 19:20:54 +000038s_push(register stack *s, dfa *d, node *parent)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000039{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000040 register stackentry *top;
41 if (s->s_top == s->s_base) {
42 fprintf(stderr, "s_push: parser stack overflow\n");
43 return E_NOMEM;
44 }
45 top = --s->s_top;
46 top->s_dfa = d;
47 top->s_parent = parent;
48 top->s_state = 0;
49 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000050}
51
Guido van Rossum408027e1996-12-30 16:17:54 +000052#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054static void
Thomas Wouters23c9e002000-07-22 19:20:54 +000055s_pop(register stack *s)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000056{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000057 if (s_empty(s))
58 Py_FatalError("s_pop: parser stack underflow -- FATAL");
59 s->s_top++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000060}
61
Guido van Rossum408027e1996-12-30 16:17:54 +000062#else /* !Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000063
64#define s_pop(s) (s)->s_top++
65
66#endif
67
68
69/* PARSER CREATION */
70
71parser_state *
Thomas Wouters23c9e002000-07-22 19:20:54 +000072PyParser_New(grammar *g, int start)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000073{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000074 parser_state *ps;
75
76 if (!g->g_accel)
77 PyGrammar_AddAccelerators(g);
78 ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
79 if (ps == NULL)
80 return NULL;
81 ps->p_grammar = g;
Thomas Wouters34aa7ba2006-02-28 19:02:24 +000082#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Antoine Pitrouc83ea132010-05-09 14:46:46 +000083 ps->p_flags = 0;
Neil Schemenauerc155dd42002-03-22 23:38:11 +000084#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000085 ps->p_tree = PyNode_New(start);
86 if (ps->p_tree == NULL) {
87 PyMem_FREE(ps);
88 return NULL;
89 }
90 s_reset(&ps->p_stack);
91 (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
92 return ps;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000093}
94
95void
Thomas Wouters23c9e002000-07-22 19:20:54 +000096PyParser_Delete(parser_state *ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000097{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000098 /* NB If you want to save the parse tree,
99 you must set p_tree to NULL before calling delparser! */
100 PyNode_Free(ps->p_tree);
101 PyMem_FREE(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102}
103
104
105/* PARSER STACK OPERATIONS */
106
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107static int
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000108shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000109{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000110 int err;
111 assert(!s_empty(s));
112 err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
113 if (err)
114 return err;
115 s->s_top->s_state = newstate;
116 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000117}
118
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000119static int
Martin v. Löwis49c5da12006-03-01 22:49:05 +0000120push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000121{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000122 int err;
123 register node *n;
124 n = s->s_top->s_parent;
125 assert(!s_empty(s));
126 err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
127 if (err)
128 return err;
129 s->s_top->s_state = newstate;
130 return s_push(s, d, CHILD(n, NCH(n)-1));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000131}
132
133
134/* PARSER PROPER */
135
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000136static int
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000137classify(parser_state *ps, int type, char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000138{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000139 grammar *g = ps->p_grammar;
140 register int n = g->g_ll.ll_nlabels;
141
142 if (type == NAME) {
143 register char *s = str;
144 register label *l = g->g_ll.ll_label;
145 register int i;
146 for (i = n; i > 0; i--, l++) {
147 if (l->lb_type != NAME || l->lb_str == NULL ||
148 l->lb_str[0] != s[0] ||
149 strcmp(l->lb_str, s) != 0)
150 continue;
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000151#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000152 if (ps->p_flags & CO_FUTURE_PRINT_FUNCTION &&
153 s[0] == 'p' && strcmp(s, "print") == 0) {
154 break; /* no longer a keyword */
155 }
Thomas Wouters8ae12952006-02-28 22:42:15 +0000156#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000157 D(printf("It's a keyword\n"));
158 return n - i;
159 }
160 }
161
162 {
163 register label *l = g->g_ll.ll_label;
164 register int i;
165 for (i = n; i > 0; i--, l++) {
166 if (l->lb_type == type && l->lb_str == NULL) {
167 D(printf("It's a token we know\n"));
168 return n - i;
169 }
170 }
171 }
172
173 D(printf("Illegal token\n"));
174 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000175}
176
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000177#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000178static void
179future_hack(parser_state *ps)
180{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000181 node *n = ps->p_stack.s_top->s_parent;
182 node *ch, *cch;
183 int i;
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000184
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000185 /* from __future__ import ..., must have at least 4 children */
186 n = CHILD(n, 0);
187 if (NCH(n) < 4)
188 return;
189 ch = CHILD(n, 0);
190 if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
191 return;
192 ch = CHILD(n, 1);
193 if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
194 strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
195 return;
196 ch = CHILD(n, 3);
197 /* ch can be a star, a parenthesis or import_as_names */
198 if (TYPE(ch) == STAR)
199 return;
200 if (TYPE(ch) == LPAR)
201 ch = CHILD(n, 4);
202
203 for (i = 0; i < NCH(ch); i += 2) {
204 cch = CHILD(ch, i);
205 if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
206 char *str_ch = STR(CHILD(cch, 0));
207 if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
208 ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
209 } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
210 ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
211 } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
212 ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
213 }
214 }
215 }
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000216}
Neil Schemenauerc155dd42002-03-22 23:38:11 +0000217#endif /* future keyword */
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000218
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000219int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000220PyParser_AddToken(register parser_state *ps, register int type, char *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000221 int lineno, int col_offset, int *expected_ret)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000222{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000223 register int ilabel;
224 int err;
225
226 D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
227
228 /* Find out which label this token is */
229 ilabel = classify(ps, type, str);
230 if (ilabel < 0)
231 return E_SYNTAX;
232
233 /* Loop until the token is shifted or an error occurred */
234 for (;;) {
235 /* Fetch the current dfa and state */
236 register dfa *d = ps->p_stack.s_top->s_dfa;
237 register state *s = &d->d_state[ps->p_stack.s_top->s_state];
238
239 D(printf(" DFA '%s', state %d:",
240 d->d_name, ps->p_stack.s_top->s_state));
241
242 /* Check accelerator */
243 if (s->s_lower <= ilabel && ilabel < s->s_upper) {
244 register int x = s->s_accel[ilabel - s->s_lower];
245 if (x != -1) {
246 if (x & (1<<7)) {
247 /* Push non-terminal */
248 int nt = (x >> 8) + NT_OFFSET;
249 int arrow = x & ((1<<7)-1);
250 dfa *d1 = PyGrammar_FindDFA(
251 ps->p_grammar, nt);
252 if ((err = push(&ps->p_stack, nt, d1,
253 arrow, lineno, col_offset)) > 0) {
254 D(printf(" MemError: push\n"));
255 return err;
256 }
257 D(printf(" Push ...\n"));
258 continue;
259 }
260
261 /* Shift the token */
262 if ((err = shift(&ps->p_stack, type, str,
263 x, lineno, col_offset)) > 0) {
264 D(printf(" MemError: shift.\n"));
265 return err;
266 }
267 D(printf(" Shift.\n"));
268 /* Pop while we are in an accept-only state */
269 while (s = &d->d_state
270 [ps->p_stack.s_top->s_state],
271 s->s_accept && s->s_narcs == 1) {
272 D(printf(" DFA '%s', state %d: "
273 "Direct pop.\n",
274 d->d_name,
275 ps->p_stack.s_top->s_state));
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000276#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000277 if (d->d_name[0] == 'i' &&
278 strcmp(d->d_name,
279 "import_stmt") == 0)
280 future_hack(ps);
Neil Schemenauerc155dd42002-03-22 23:38:11 +0000281#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000282 s_pop(&ps->p_stack);
283 if (s_empty(&ps->p_stack)) {
284 D(printf(" ACCEPT.\n"));
285 return E_DONE;
286 }
287 d = ps->p_stack.s_top->s_dfa;
288 }
289 return E_OK;
290 }
291 }
292
293 if (s->s_accept) {
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000294#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000295 if (d->d_name[0] == 'i' &&
296 strcmp(d->d_name, "import_stmt") == 0)
297 future_hack(ps);
Neil Schemenauerc155dd42002-03-22 23:38:11 +0000298#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000299 /* Pop this dfa and try again */
300 s_pop(&ps->p_stack);
301 D(printf(" Pop ...\n"));
302 if (s_empty(&ps->p_stack)) {
303 D(printf(" Error: bottom of stack.\n"));
304 return E_SYNTAX;
305 }
306 continue;
307 }
308
309 /* Stuck, report syntax error */
310 D(printf(" Error.\n"));
311 if (expected_ret) {
312 if (s->s_lower == s->s_upper - 1) {
313 /* Only one possible expected token */
314 *expected_ret = ps->p_grammar->
315 g_ll.ll_label[s->s_lower].lb_type;
316 }
317 else
318 *expected_ret = -1;
319 }
320 return E_SYNTAX;
321 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000322}
323
324
Guido van Rossum408027e1996-12-30 16:17:54 +0000325#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000326
327/* DEBUG OUTPUT */
328
329void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000330dumptree(grammar *g, node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000331{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000332 int i;
333
334 if (n == NULL)
335 printf("NIL");
336 else {
337 label l;
338 l.lb_type = TYPE(n);
339 l.lb_str = STR(n);
340 printf("%s", PyGrammar_LabelRepr(&l));
341 if (ISNONTERMINAL(TYPE(n))) {
342 printf("(");
343 for (i = 0; i < NCH(n); i++) {
344 if (i > 0)
345 printf(",");
346 dumptree(g, CHILD(n, i));
347 }
348 printf(")");
349 }
350 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000351}
352
353void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000354showtree(grammar *g, node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000355{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000356 int i;
357
358 if (n == NULL)
359 return;
360 if (ISNONTERMINAL(TYPE(n))) {
361 for (i = 0; i < NCH(n); i++)
362 showtree(g, CHILD(n, i));
363 }
364 else if (ISTERMINAL(TYPE(n))) {
365 printf("%s", _PyParser_TokenNames[TYPE(n)]);
366 if (TYPE(n) == NUMBER || TYPE(n) == NAME)
367 printf("(%s)", STR(n));
368 printf(" ");
369 }
370 else
371 printf("? ");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000372}
373
374void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000375printtree(parser_state *ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000376{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000377 if (Py_DebugFlag) {
378 printf("Parse tree:\n");
379 dumptree(ps->p_grammar, ps->p_tree);
380 printf("\n");
381 printf("Tokens:\n");
382 showtree(ps->p_grammar, ps->p_tree);
383 printf("\n");
384 }
385 printf("Listing:\n");
386 PyNode_ListTree(ps->p_tree);
387 printf("\n");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000388}
389
Guido van Rossum408027e1996-12-30 16:17:54 +0000390#endif /* Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000391
392/*
393
394Description
395-----------
396
397The parser's interface is different than usual: the function addtoken()
398must be called for each token in the input. This makes it possible to
399turn it into an incremental parsing system later. The parsing system
400constructs a parse tree as it goes.
401
402A parsing rule is represented as a Deterministic Finite-state Automaton
403(DFA). A node in a DFA represents a state of the parser; an arc represents
404a transition. Transitions are either labeled with terminal symbols or
405with non-terminals. When the parser decides to follow an arc labeled
406with a non-terminal, it is invoked recursively with the DFA representing
407the parsing rule for that as its initial state; when that DFA accepts,
408the parser that invoked it continues. The parse tree constructed by the
409recursively called parser is inserted as a child in the current parse tree.
410
411The DFA's can be constructed automatically from a more conventional
412language description. An extended LL(1) grammar (ELL(1)) is suitable.
413Certain restrictions make the parser's life easier: rules that can produce
414the empty string should be outlawed (there are other ways to put loops
415or optional parts in the language). To avoid the need to construct
416FIRST sets, we can require that all but the last alternative of a rule
417(really: arc going out of a DFA's state) must begin with a terminal
418symbol.
419
420As an example, consider this grammar:
421
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000422expr: term (OP term)*
423term: CONSTANT | '(' expr ')'
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000424
425The DFA corresponding to the rule for expr is:
426
427------->.---term-->.------->
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000428 ^ |
429 | |
430 \----OP----/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000431
432The parse tree generated for the input a+b is:
433
434(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
435
436*/