blob: 753c43ac9c9eb03234ac722c21361dbc11cec10c [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* Parser implementation */
3
4/* For a description, see the comments at end of this file */
5
6/* XXX To do: error recovery */
7
Guido van Rossum3f5da241990-12-20 15:06:42 +00008#include "pgenheaders.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00009#include "assert.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000010#include "token.h"
11#include "grammar.h"
12#include "node.h"
13#include "parser.h"
14#include "errcode.h"
15
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000016
Guido van Rossum408027e1996-12-30 16:17:54 +000017#ifdef Py_DEBUG
Guido van Rossum86bea461997-04-29 21:03:06 +000018extern int Py_DebugFlag;
19#define D(x) if (!Py_DebugFlag); else x
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000020#else
21#define D(x)
22#endif
23
24
25/* STACK DATA TYPE */
26
Tim Petersdbd9ba62000-07-09 03:09:57 +000027static void s_reset(stack *);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000028
29static void
Thomas Wouters23c9e002000-07-22 19:20:54 +000030s_reset(stack *s)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000031{
32 s->s_top = &s->s_base[MAXSTACK];
33}
34
35#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
36
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000037static int
Thomas Wouters23c9e002000-07-22 19:20:54 +000038s_push(register stack *s, dfa *d, node *parent)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000039{
40 register stackentry *top;
41 if (s->s_top == s->s_base) {
42 fprintf(stderr, "s_push: parser stack overflow\n");
Guido van Rossume3c3b272000-10-02 10:21:59 +000043 return E_NOMEM;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000044 }
45 top = --s->s_top;
46 top->s_dfa = d;
47 top->s_parent = parent;
48 top->s_state = 0;
49 return 0;
50}
51
Guido van Rossum408027e1996-12-30 16:17:54 +000052#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054static void
Thomas Wouters23c9e002000-07-22 19:20:54 +000055s_pop(register stack *s)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000056{
Guido van Rossum588633d1994-12-30 15:46:02 +000057 if (s_empty(s))
Guido van Rossum86bea461997-04-29 21:03:06 +000058 Py_FatalError("s_pop: parser stack underflow -- FATAL");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000059 s->s_top++;
60}
61
Guido van Rossum408027e1996-12-30 16:17:54 +000062#else /* !Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000063
64#define s_pop(s) (s)->s_top++
65
66#endif
67
68
69/* PARSER CREATION */
70
71parser_state *
Thomas Wouters23c9e002000-07-22 19:20:54 +000072PyParser_New(grammar *g, int start)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000073{
74 parser_state *ps;
75
76 if (!g->g_accel)
Guido van Rossum86bea461997-04-29 21:03:06 +000077 PyGrammar_AddAccelerators(g);
78 ps = PyMem_NEW(parser_state, 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000079 if (ps == NULL)
80 return NULL;
81 ps->p_grammar = g;
Guido van Rossumb09f7ed2001-07-15 21:08:29 +000082 ps->p_generators = 0;
Guido van Rossum86bea461997-04-29 21:03:06 +000083 ps->p_tree = PyNode_New(start);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000084 if (ps->p_tree == NULL) {
Guido van Rossum86bea461997-04-29 21:03:06 +000085 PyMem_DEL(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000086 return NULL;
87 }
88 s_reset(&ps->p_stack);
Guido van Rossum86bea461997-04-29 21:03:06 +000089 (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000090 return ps;
91}
92
93void
Thomas Wouters23c9e002000-07-22 19:20:54 +000094PyParser_Delete(parser_state *ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000095{
Guido van Rossum99f02d41990-11-18 17:38:42 +000096 /* NB If you want to save the parse tree,
97 you must set p_tree to NULL before calling delparser! */
Guido van Rossum86bea461997-04-29 21:03:06 +000098 PyNode_Free(ps->p_tree);
99 PyMem_DEL(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000100}
101
102
103/* PARSER STACK OPERATIONS */
104
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000105static int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000106shift(register stack *s, int type, char *str, int newstate, int lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107{
Jeremy Hylton94988062000-06-20 19:10:44 +0000108 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000109 assert(!s_empty(s));
Jeremy Hylton94988062000-06-20 19:10:44 +0000110 err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno);
111 if (err)
112 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000113 s->s_top->s_state = newstate;
114 return 0;
115}
116
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000117static int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000118push(register stack *s, int type, dfa *d, int newstate, int lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000119{
Jeremy Hylton94988062000-06-20 19:10:44 +0000120 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000121 register node *n;
122 n = s->s_top->s_parent;
123 assert(!s_empty(s));
Jeremy Hylton94988062000-06-20 19:10:44 +0000124 err = PyNode_AddChild(n, type, (char *)NULL, lineno);
125 if (err)
126 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000127 s->s_top->s_state = newstate;
128 return s_push(s, d, CHILD(n, NCH(n)-1));
129}
130
131
132/* PARSER PROPER */
133
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000134static int
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000135classify(parser_state *ps, int type, char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000136{
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000137 grammar *g = ps->p_grammar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000138 register int n = g->g_ll.ll_nlabels;
139
140 if (type == NAME) {
141 register char *s = str;
142 register label *l = g->g_ll.ll_label;
143 register int i;
144 for (i = n; i > 0; i--, l++) {
145 if (l->lb_type == NAME && l->lb_str != NULL &&
146 l->lb_str[0] == s[0] &&
147 strcmp(l->lb_str, s) == 0) {
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000148 if (!ps->p_generators &&
149 s[0] == 'y' &&
150 strcmp(s, "yield") == 0)
151 break; /* not a keyword */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152 D(printf("It's a keyword\n"));
153 return n - i;
154 }
155 }
156 }
157
158 {
159 register label *l = g->g_ll.ll_label;
160 register int i;
161 for (i = n; i > 0; i--, l++) {
162 if (l->lb_type == type && l->lb_str == NULL) {
163 D(printf("It's a token we know\n"));
164 return n - i;
165 }
166 }
167 }
168
169 D(printf("Illegal token\n"));
170 return -1;
171}
172
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000173static void
174future_hack(parser_state *ps)
175{
176 node *n = ps->p_stack.s_top->s_parent;
177 node *ch;
178
179 if (strcmp(STR(CHILD(n, 0)), "from") != 0)
180 return;
181 ch = CHILD(n, 1);
182 if (strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
183 return;
184 ch = CHILD(n, 3);
185 if (NCH(ch) == 1 && strcmp(STR(CHILD(ch, 0)), "generators") == 0)
186 ps->p_generators = 1;
187}
188
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000189int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000190PyParser_AddToken(register parser_state *ps, register int type, char *str,
191 int lineno, int *expected_ret)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000192{
193 register int ilabel;
Jeremy Hylton94988062000-06-20 19:10:44 +0000194 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000195
Guido van Rossum86bea461997-04-29 21:03:06 +0000196 D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000197
198 /* Find out which label this token is */
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000199 ilabel = classify(ps, type, str);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000200 if (ilabel < 0)
201 return E_SYNTAX;
202
203 /* Loop until the token is shifted or an error occurred */
204 for (;;) {
205 /* Fetch the current dfa and state */
206 register dfa *d = ps->p_stack.s_top->s_dfa;
207 register state *s = &d->d_state[ps->p_stack.s_top->s_state];
208
209 D(printf(" DFA '%s', state %d:",
210 d->d_name, ps->p_stack.s_top->s_state));
211
212 /* Check accelerator */
213 if (s->s_lower <= ilabel && ilabel < s->s_upper) {
214 register int x = s->s_accel[ilabel - s->s_lower];
215 if (x != -1) {
216 if (x & (1<<7)) {
217 /* Push non-terminal */
218 int nt = (x >> 8) + NT_OFFSET;
219 int arrow = x & ((1<<7)-1);
Guido van Rossum86bea461997-04-29 21:03:06 +0000220 dfa *d1 = PyGrammar_FindDFA(
221 ps->p_grammar, nt);
Jeremy Hylton94988062000-06-20 19:10:44 +0000222 if ((err = push(&ps->p_stack, nt, d1,
223 arrow, lineno)) > 0) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000224 D(printf(" MemError: push\n"));
Jeremy Hylton94988062000-06-20 19:10:44 +0000225 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000226 }
227 D(printf(" Push ...\n"));
228 continue;
229 }
230
231 /* Shift the token */
Jeremy Hylton94988062000-06-20 19:10:44 +0000232 if ((err = shift(&ps->p_stack, type, str,
233 x, lineno)) > 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000234 D(printf(" MemError: shift.\n"));
Jeremy Hylton94988062000-06-20 19:10:44 +0000235 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000236 }
237 D(printf(" Shift.\n"));
238 /* Pop while we are in an accept-only state */
239 while (s = &d->d_state
240 [ps->p_stack.s_top->s_state],
241 s->s_accept && s->s_narcs == 1) {
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000242 D(printf(" DFA '%s', state %d: "
243 "Direct pop.\n",
244 d->d_name,
245 ps->p_stack.s_top->s_state));
246 if (d->d_name[0] == 'i' &&
247 strcmp(d->d_name,
248 "import_stmt") == 0)
249 future_hack(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000250 s_pop(&ps->p_stack);
251 if (s_empty(&ps->p_stack)) {
252 D(printf(" ACCEPT.\n"));
253 return E_DONE;
254 }
255 d = ps->p_stack.s_top->s_dfa;
256 }
257 return E_OK;
258 }
259 }
260
261 if (s->s_accept) {
Guido van Rossumb09f7ed2001-07-15 21:08:29 +0000262 if (d->d_name[0] == 'i' &&
263 strcmp(d->d_name, "import_stmt") == 0)
264 future_hack(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000265 /* Pop this dfa and try again */
266 s_pop(&ps->p_stack);
267 D(printf(" Pop ...\n"));
268 if (s_empty(&ps->p_stack)) {
269 D(printf(" Error: bottom of stack.\n"));
270 return E_SYNTAX;
271 }
272 continue;
273 }
274
275 /* Stuck, report syntax error */
276 D(printf(" Error.\n"));
Fred Drake85f36392000-07-11 17:53:00 +0000277 if (expected_ret) {
278 if (s->s_lower == s->s_upper - 1) {
279 /* Only one possible expected token */
280 *expected_ret = ps->p_grammar->
281 g_ll.ll_label[s->s_lower].lb_type;
282 }
283 else
284 *expected_ret = -1;
285 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000286 return E_SYNTAX;
287 }
288}
289
290
Guido van Rossum408027e1996-12-30 16:17:54 +0000291#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000292
293/* DEBUG OUTPUT */
294
295void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000296dumptree(grammar *g, node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000297{
298 int i;
299
300 if (n == NULL)
301 printf("NIL");
302 else {
303 label l;
304 l.lb_type = TYPE(n);
Guido van Rossumcf7448b1992-09-03 20:46:37 +0000305 l.lb_str = STR(n);
Guido van Rossum86bea461997-04-29 21:03:06 +0000306 printf("%s", PyGrammar_LabelRepr(&l));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000307 if (ISNONTERMINAL(TYPE(n))) {
308 printf("(");
309 for (i = 0; i < NCH(n); i++) {
310 if (i > 0)
311 printf(",");
312 dumptree(g, CHILD(n, i));
313 }
314 printf(")");
315 }
316 }
317}
318
319void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000320showtree(grammar *g, node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000321{
322 int i;
323
324 if (n == NULL)
325 return;
326 if (ISNONTERMINAL(TYPE(n))) {
327 for (i = 0; i < NCH(n); i++)
328 showtree(g, CHILD(n, i));
329 }
330 else if (ISTERMINAL(TYPE(n))) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000331 printf("%s", _PyParser_TokenNames[TYPE(n)]);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000332 if (TYPE(n) == NUMBER || TYPE(n) == NAME)
333 printf("(%s)", STR(n));
334 printf(" ");
335 }
336 else
337 printf("? ");
338}
339
340void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000341printtree(parser_state *ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000342{
Guido van Rossum86bea461997-04-29 21:03:06 +0000343 if (Py_DebugFlag) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000344 printf("Parse tree:\n");
345 dumptree(ps->p_grammar, ps->p_tree);
346 printf("\n");
347 printf("Tokens:\n");
348 showtree(ps->p_grammar, ps->p_tree);
349 printf("\n");
350 }
351 printf("Listing:\n");
Guido van Rossum86bea461997-04-29 21:03:06 +0000352 PyNode_ListTree(ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000353 printf("\n");
354}
355
Guido van Rossum408027e1996-12-30 16:17:54 +0000356#endif /* Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000357
358/*
359
360Description
361-----------
362
363The parser's interface is different than usual: the function addtoken()
364must be called for each token in the input. This makes it possible to
365turn it into an incremental parsing system later. The parsing system
366constructs a parse tree as it goes.
367
368A parsing rule is represented as a Deterministic Finite-state Automaton
369(DFA). A node in a DFA represents a state of the parser; an arc represents
370a transition. Transitions are either labeled with terminal symbols or
371with non-terminals. When the parser decides to follow an arc labeled
372with a non-terminal, it is invoked recursively with the DFA representing
373the parsing rule for that as its initial state; when that DFA accepts,
374the parser that invoked it continues. The parse tree constructed by the
375recursively called parser is inserted as a child in the current parse tree.
376
377The DFA's can be constructed automatically from a more conventional
378language description. An extended LL(1) grammar (ELL(1)) is suitable.
379Certain restrictions make the parser's life easier: rules that can produce
380the empty string should be outlawed (there are other ways to put loops
381or optional parts in the language). To avoid the need to construct
382FIRST sets, we can require that all but the last alternative of a rule
383(really: arc going out of a DFA's state) must begin with a terminal
384symbol.
385
386As an example, consider this grammar:
387
388expr: term (OP term)*
389term: CONSTANT | '(' expr ')'
390
391The DFA corresponding to the rule for expr is:
392
393------->.---term-->.------->
394 ^ |
395 | |
396 \----OP----/
397
398The parse tree generated for the input a+b is:
399
400(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
401
402*/