blob: 5017a506695c973511b2e0a7389184c307ba2bb0 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00002Copyright (c) 2000, BeOpen.com.
3Copyright (c) 1995-2000, Corporation for National Research Initiatives.
4Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
5All rights reserved.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00006
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00007See the file "Misc/COPYRIGHT" for information on usage and
8redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009******************************************************************/
10
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000011/* Parser implementation */
12
13/* For a description, see the comments at end of this file */
14
15/* XXX To do: error recovery */
16
Guido van Rossum3f5da241990-12-20 15:06:42 +000017#include "pgenheaders.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000018#include "assert.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000019#include "token.h"
20#include "grammar.h"
21#include "node.h"
22#include "parser.h"
23#include "errcode.h"
24
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000025
Guido van Rossum408027e1996-12-30 16:17:54 +000026#ifdef Py_DEBUG
Guido van Rossum86bea461997-04-29 21:03:06 +000027extern int Py_DebugFlag;
28#define D(x) if (!Py_DebugFlag); else x
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000029#else
30#define D(x)
31#endif
32
33
34/* STACK DATA TYPE */
35
Tim Petersdbd9ba62000-07-09 03:09:57 +000036static void s_reset(stack *);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000037
38static void
Thomas Wouters23c9e002000-07-22 19:20:54 +000039s_reset(stack *s)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000040{
41 s->s_top = &s->s_base[MAXSTACK];
42}
43
44#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
45
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000046static int
Thomas Wouters23c9e002000-07-22 19:20:54 +000047s_push(register stack *s, dfa *d, node *parent)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000048{
49 register stackentry *top;
50 if (s->s_top == s->s_base) {
51 fprintf(stderr, "s_push: parser stack overflow\n");
52 return -1;
53 }
54 top = --s->s_top;
55 top->s_dfa = d;
56 top->s_parent = parent;
57 top->s_state = 0;
58 return 0;
59}
60
Guido van Rossum408027e1996-12-30 16:17:54 +000061#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000062
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000063static void
Thomas Wouters23c9e002000-07-22 19:20:54 +000064s_pop(register stack *s)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000065{
Guido van Rossum588633d1994-12-30 15:46:02 +000066 if (s_empty(s))
Guido van Rossum86bea461997-04-29 21:03:06 +000067 Py_FatalError("s_pop: parser stack underflow -- FATAL");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000068 s->s_top++;
69}
70
Guido van Rossum408027e1996-12-30 16:17:54 +000071#else /* !Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000072
73#define s_pop(s) (s)->s_top++
74
75#endif
76
77
78/* PARSER CREATION */
79
80parser_state *
Thomas Wouters23c9e002000-07-22 19:20:54 +000081PyParser_New(grammar *g, int start)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000082{
83 parser_state *ps;
84
85 if (!g->g_accel)
Guido van Rossum86bea461997-04-29 21:03:06 +000086 PyGrammar_AddAccelerators(g);
87 ps = PyMem_NEW(parser_state, 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000088 if (ps == NULL)
89 return NULL;
90 ps->p_grammar = g;
Guido van Rossum86bea461997-04-29 21:03:06 +000091 ps->p_tree = PyNode_New(start);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000092 if (ps->p_tree == NULL) {
Guido van Rossum86bea461997-04-29 21:03:06 +000093 PyMem_DEL(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000094 return NULL;
95 }
96 s_reset(&ps->p_stack);
Guido van Rossum86bea461997-04-29 21:03:06 +000097 (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000098 return ps;
99}
100
101void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000102PyParser_Delete(parser_state *ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Guido van Rossum99f02d41990-11-18 17:38:42 +0000104 /* NB If you want to save the parse tree,
105 you must set p_tree to NULL before calling delparser! */
Guido van Rossum86bea461997-04-29 21:03:06 +0000106 PyNode_Free(ps->p_tree);
107 PyMem_DEL(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000108}
109
110
111/* PARSER STACK OPERATIONS */
112
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000113static int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000114shift(register stack *s, int type, char *str, int newstate, int lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000115{
Jeremy Hylton94988062000-06-20 19:10:44 +0000116 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000117 assert(!s_empty(s));
Jeremy Hylton94988062000-06-20 19:10:44 +0000118 err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno);
119 if (err)
120 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000121 s->s_top->s_state = newstate;
122 return 0;
123}
124
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000125static int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000126push(register stack *s, int type, dfa *d, int newstate, int lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000127{
Jeremy Hylton94988062000-06-20 19:10:44 +0000128 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000129 register node *n;
130 n = s->s_top->s_parent;
131 assert(!s_empty(s));
Jeremy Hylton94988062000-06-20 19:10:44 +0000132 err = PyNode_AddChild(n, type, (char *)NULL, lineno);
133 if (err)
134 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000135 s->s_top->s_state = newstate;
136 return s_push(s, d, CHILD(n, NCH(n)-1));
137}
138
139
140/* PARSER PROPER */
141
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000142static int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000143classify(grammar *g, int type, char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000144{
145 register int n = g->g_ll.ll_nlabels;
146
147 if (type == NAME) {
148 register char *s = str;
149 register label *l = g->g_ll.ll_label;
150 register int i;
151 for (i = n; i > 0; i--, l++) {
152 if (l->lb_type == NAME && l->lb_str != NULL &&
153 l->lb_str[0] == s[0] &&
154 strcmp(l->lb_str, s) == 0) {
155 D(printf("It's a keyword\n"));
156 return n - i;
157 }
158 }
159 }
160
161 {
162 register label *l = g->g_ll.ll_label;
163 register int i;
164 for (i = n; i > 0; i--, l++) {
165 if (l->lb_type == type && l->lb_str == NULL) {
166 D(printf("It's a token we know\n"));
167 return n - i;
168 }
169 }
170 }
171
172 D(printf("Illegal token\n"));
173 return -1;
174}
175
176int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000177PyParser_AddToken(register parser_state *ps, register int type, char *str,
178 int lineno, int *expected_ret)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000179{
180 register int ilabel;
Jeremy Hylton94988062000-06-20 19:10:44 +0000181 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000182
Guido van Rossum86bea461997-04-29 21:03:06 +0000183 D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000184
185 /* Find out which label this token is */
186 ilabel = classify(ps->p_grammar, type, str);
187 if (ilabel < 0)
188 return E_SYNTAX;
189
190 /* Loop until the token is shifted or an error occurred */
191 for (;;) {
192 /* Fetch the current dfa and state */
193 register dfa *d = ps->p_stack.s_top->s_dfa;
194 register state *s = &d->d_state[ps->p_stack.s_top->s_state];
195
196 D(printf(" DFA '%s', state %d:",
197 d->d_name, ps->p_stack.s_top->s_state));
198
199 /* Check accelerator */
200 if (s->s_lower <= ilabel && ilabel < s->s_upper) {
201 register int x = s->s_accel[ilabel - s->s_lower];
202 if (x != -1) {
203 if (x & (1<<7)) {
204 /* Push non-terminal */
205 int nt = (x >> 8) + NT_OFFSET;
206 int arrow = x & ((1<<7)-1);
Guido van Rossum86bea461997-04-29 21:03:06 +0000207 dfa *d1 = PyGrammar_FindDFA(
208 ps->p_grammar, nt);
Jeremy Hylton94988062000-06-20 19:10:44 +0000209 if ((err = push(&ps->p_stack, nt, d1,
210 arrow, lineno)) > 0) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000211 D(printf(" MemError: push\n"));
Jeremy Hylton94988062000-06-20 19:10:44 +0000212 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000213 }
214 D(printf(" Push ...\n"));
215 continue;
216 }
217
218 /* Shift the token */
Jeremy Hylton94988062000-06-20 19:10:44 +0000219 if ((err = shift(&ps->p_stack, type, str,
220 x, lineno)) > 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000221 D(printf(" MemError: shift.\n"));
Jeremy Hylton94988062000-06-20 19:10:44 +0000222 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000223 }
224 D(printf(" Shift.\n"));
225 /* Pop while we are in an accept-only state */
226 while (s = &d->d_state
227 [ps->p_stack.s_top->s_state],
228 s->s_accept && s->s_narcs == 1) {
229 D(printf(" Direct pop.\n"));
230 s_pop(&ps->p_stack);
231 if (s_empty(&ps->p_stack)) {
232 D(printf(" ACCEPT.\n"));
233 return E_DONE;
234 }
235 d = ps->p_stack.s_top->s_dfa;
236 }
237 return E_OK;
238 }
239 }
240
241 if (s->s_accept) {
242 /* Pop this dfa and try again */
243 s_pop(&ps->p_stack);
244 D(printf(" Pop ...\n"));
245 if (s_empty(&ps->p_stack)) {
246 D(printf(" Error: bottom of stack.\n"));
247 return E_SYNTAX;
248 }
249 continue;
250 }
251
252 /* Stuck, report syntax error */
253 D(printf(" Error.\n"));
Fred Drake85f36392000-07-11 17:53:00 +0000254 if (expected_ret) {
255 if (s->s_lower == s->s_upper - 1) {
256 /* Only one possible expected token */
257 *expected_ret = ps->p_grammar->
258 g_ll.ll_label[s->s_lower].lb_type;
259 }
260 else
261 *expected_ret = -1;
262 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000263 return E_SYNTAX;
264 }
265}
266
267
Guido van Rossum408027e1996-12-30 16:17:54 +0000268#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000269
270/* DEBUG OUTPUT */
271
272void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000273dumptree(grammar *g, node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000274{
275 int i;
276
277 if (n == NULL)
278 printf("NIL");
279 else {
280 label l;
281 l.lb_type = TYPE(n);
Guido van Rossumcf7448b1992-09-03 20:46:37 +0000282 l.lb_str = STR(n);
Guido van Rossum86bea461997-04-29 21:03:06 +0000283 printf("%s", PyGrammar_LabelRepr(&l));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000284 if (ISNONTERMINAL(TYPE(n))) {
285 printf("(");
286 for (i = 0; i < NCH(n); i++) {
287 if (i > 0)
288 printf(",");
289 dumptree(g, CHILD(n, i));
290 }
291 printf(")");
292 }
293 }
294}
295
296void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000297showtree(grammar *g, node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000298{
299 int i;
300
301 if (n == NULL)
302 return;
303 if (ISNONTERMINAL(TYPE(n))) {
304 for (i = 0; i < NCH(n); i++)
305 showtree(g, CHILD(n, i));
306 }
307 else if (ISTERMINAL(TYPE(n))) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000308 printf("%s", _PyParser_TokenNames[TYPE(n)]);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000309 if (TYPE(n) == NUMBER || TYPE(n) == NAME)
310 printf("(%s)", STR(n));
311 printf(" ");
312 }
313 else
314 printf("? ");
315}
316
317void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000318printtree(parser_state *ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000319{
Guido van Rossum86bea461997-04-29 21:03:06 +0000320 if (Py_DebugFlag) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000321 printf("Parse tree:\n");
322 dumptree(ps->p_grammar, ps->p_tree);
323 printf("\n");
324 printf("Tokens:\n");
325 showtree(ps->p_grammar, ps->p_tree);
326 printf("\n");
327 }
328 printf("Listing:\n");
Guido van Rossum86bea461997-04-29 21:03:06 +0000329 PyNode_ListTree(ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000330 printf("\n");
331}
332
Guido van Rossum408027e1996-12-30 16:17:54 +0000333#endif /* Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334
335/*
336
337Description
338-----------
339
340The parser's interface is different than usual: the function addtoken()
341must be called for each token in the input. This makes it possible to
342turn it into an incremental parsing system later. The parsing system
343constructs a parse tree as it goes.
344
345A parsing rule is represented as a Deterministic Finite-state Automaton
346(DFA). A node in a DFA represents a state of the parser; an arc represents
347a transition. Transitions are either labeled with terminal symbols or
348with non-terminals. When the parser decides to follow an arc labeled
349with a non-terminal, it is invoked recursively with the DFA representing
350the parsing rule for that as its initial state; when that DFA accepts,
351the parser that invoked it continues. The parse tree constructed by the
352recursively called parser is inserted as a child in the current parse tree.
353
354The DFA's can be constructed automatically from a more conventional
355language description. An extended LL(1) grammar (ELL(1)) is suitable.
356Certain restrictions make the parser's life easier: rules that can produce
357the empty string should be outlawed (there are other ways to put loops
358or optional parts in the language). To avoid the need to construct
359FIRST sets, we can require that all but the last alternative of a rule
360(really: arc going out of a DFA's state) must begin with a terminal
361symbol.
362
363As an example, consider this grammar:
364
365expr: term (OP term)*
366term: CONSTANT | '(' expr ')'
367
368The DFA corresponding to the rule for expr is:
369
370------->.---term-->.------->
371 ^ |
372 | |
373 \----OP----/
374
375The parse tree generated for the input a+b is:
376
377(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
378
379*/