blob: 00299afe70c72be24dd4294c84e4f4124f86a3b0 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumb9f8d6e1995-01-04 19:08:09 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* Parser implementation */
33
34/* For a description, see the comments at end of this file */
35
36/* XXX To do: error recovery */
37
Guido van Rossum3f5da241990-12-20 15:06:42 +000038#include "pgenheaders.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000039#include "assert.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000040#include "token.h"
41#include "grammar.h"
42#include "node.h"
43#include "parser.h"
44#include "errcode.h"
45
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000046
Guido van Rossum408027e1996-12-30 16:17:54 +000047#ifdef Py_DEBUG
Guido van Rossum86bea461997-04-29 21:03:06 +000048extern int Py_DebugFlag;
49#define D(x) if (!Py_DebugFlag); else x
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000050#else
51#define D(x)
52#endif
53
54
55/* STACK DATA TYPE */
56
Guido van Rossum86bea461997-04-29 21:03:06 +000057static void s_reset Py_PROTO((stack *));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000058
59static void
60s_reset(s)
61 stack *s;
62{
63 s->s_top = &s->s_base[MAXSTACK];
64}
65
66#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
67
Guido van Rossum86bea461997-04-29 21:03:06 +000068static int s_push Py_PROTO((stack *, dfa *, node *));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000069
70static int
71s_push(s, d, parent)
72 register stack *s;
73 dfa *d;
74 node *parent;
75{
76 register stackentry *top;
77 if (s->s_top == s->s_base) {
78 fprintf(stderr, "s_push: parser stack overflow\n");
79 return -1;
80 }
81 top = --s->s_top;
82 top->s_dfa = d;
83 top->s_parent = parent;
84 top->s_state = 0;
85 return 0;
86}
87
Guido van Rossum408027e1996-12-30 16:17:54 +000088#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000089
Guido van Rossum86bea461997-04-29 21:03:06 +000090static void s_pop Py_PROTO((stack *));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000091
92static void
93s_pop(s)
94 register stack *s;
95{
Guido van Rossum588633d1994-12-30 15:46:02 +000096 if (s_empty(s))
Guido van Rossum86bea461997-04-29 21:03:06 +000097 Py_FatalError("s_pop: parser stack underflow -- FATAL");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000098 s->s_top++;
99}
100
Guido van Rossum408027e1996-12-30 16:17:54 +0000101#else /* !Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102
103#define s_pop(s) (s)->s_top++
104
105#endif
106
107
108/* PARSER CREATION */
109
110parser_state *
Guido van Rossum86bea461997-04-29 21:03:06 +0000111PyParser_New(g, start)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000112 grammar *g;
113 int start;
114{
115 parser_state *ps;
116
117 if (!g->g_accel)
Guido van Rossum86bea461997-04-29 21:03:06 +0000118 PyGrammar_AddAccelerators(g);
119 ps = PyMem_NEW(parser_state, 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000120 if (ps == NULL)
121 return NULL;
122 ps->p_grammar = g;
Guido van Rossum86bea461997-04-29 21:03:06 +0000123 ps->p_tree = PyNode_New(start);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000124 if (ps->p_tree == NULL) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000125 PyMem_DEL(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000126 return NULL;
127 }
128 s_reset(&ps->p_stack);
Guido van Rossum86bea461997-04-29 21:03:06 +0000129 (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000130 return ps;
131}
132
133void
Guido van Rossum86bea461997-04-29 21:03:06 +0000134PyParser_Delete(ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000135 parser_state *ps;
136{
Guido van Rossum99f02d41990-11-18 17:38:42 +0000137 /* NB If you want to save the parse tree,
138 you must set p_tree to NULL before calling delparser! */
Guido van Rossum86bea461997-04-29 21:03:06 +0000139 PyNode_Free(ps->p_tree);
140 PyMem_DEL(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000141}
142
143
144/* PARSER STACK OPERATIONS */
145
Guido van Rossum86bea461997-04-29 21:03:06 +0000146static int shift Py_PROTO((stack *, int, char *, int, int));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000147
148static int
Guido van Rossum3f5da241990-12-20 15:06:42 +0000149shift(s, type, str, newstate, lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000150 register stack *s;
151 int type;
152 char *str;
153 int newstate;
Guido van Rossum3f5da241990-12-20 15:06:42 +0000154 int lineno;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000155{
Jeremy Hylton94988062000-06-20 19:10:44 +0000156 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000157 assert(!s_empty(s));
Jeremy Hylton94988062000-06-20 19:10:44 +0000158 err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno);
159 if (err)
160 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000161 s->s_top->s_state = newstate;
162 return 0;
163}
164
Guido van Rossum86bea461997-04-29 21:03:06 +0000165static int push Py_PROTO((stack *, int, dfa *, int, int));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000166
167static int
Guido van Rossum3f5da241990-12-20 15:06:42 +0000168push(s, type, d, newstate, lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000169 register stack *s;
170 int type;
171 dfa *d;
172 int newstate;
Guido van Rossum3f5da241990-12-20 15:06:42 +0000173 int lineno;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000174{
Jeremy Hylton94988062000-06-20 19:10:44 +0000175 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000176 register node *n;
177 n = s->s_top->s_parent;
178 assert(!s_empty(s));
Jeremy Hylton94988062000-06-20 19:10:44 +0000179 err = PyNode_AddChild(n, type, (char *)NULL, lineno);
180 if (err)
181 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000182 s->s_top->s_state = newstate;
183 return s_push(s, d, CHILD(n, NCH(n)-1));
184}
185
186
187/* PARSER PROPER */
188
Guido van Rossum86bea461997-04-29 21:03:06 +0000189static int classify Py_PROTO((grammar *, int, char *));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000190
191static int
192classify(g, type, str)
193 grammar *g;
194 register int type;
195 char *str;
196{
197 register int n = g->g_ll.ll_nlabels;
198
199 if (type == NAME) {
200 register char *s = str;
201 register label *l = g->g_ll.ll_label;
202 register int i;
203 for (i = n; i > 0; i--, l++) {
204 if (l->lb_type == NAME && l->lb_str != NULL &&
205 l->lb_str[0] == s[0] &&
206 strcmp(l->lb_str, s) == 0) {
207 D(printf("It's a keyword\n"));
208 return n - i;
209 }
210 }
211 }
212
213 {
214 register label *l = g->g_ll.ll_label;
215 register int i;
216 for (i = n; i > 0; i--, l++) {
217 if (l->lb_type == type && l->lb_str == NULL) {
218 D(printf("It's a token we know\n"));
219 return n - i;
220 }
221 }
222 }
223
224 D(printf("Illegal token\n"));
225 return -1;
226}
227
228int
Guido van Rossum86bea461997-04-29 21:03:06 +0000229PyParser_AddToken(ps, type, str, lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000230 register parser_state *ps;
231 register int type;
232 char *str;
Guido van Rossum3f5da241990-12-20 15:06:42 +0000233 int lineno;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000234{
235 register int ilabel;
Jeremy Hylton94988062000-06-20 19:10:44 +0000236 int err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000237
Guido van Rossum86bea461997-04-29 21:03:06 +0000238 D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000239
240 /* Find out which label this token is */
241 ilabel = classify(ps->p_grammar, type, str);
242 if (ilabel < 0)
243 return E_SYNTAX;
244
245 /* Loop until the token is shifted or an error occurred */
246 for (;;) {
247 /* Fetch the current dfa and state */
248 register dfa *d = ps->p_stack.s_top->s_dfa;
249 register state *s = &d->d_state[ps->p_stack.s_top->s_state];
250
251 D(printf(" DFA '%s', state %d:",
252 d->d_name, ps->p_stack.s_top->s_state));
253
254 /* Check accelerator */
255 if (s->s_lower <= ilabel && ilabel < s->s_upper) {
256 register int x = s->s_accel[ilabel - s->s_lower];
257 if (x != -1) {
258 if (x & (1<<7)) {
259 /* Push non-terminal */
260 int nt = (x >> 8) + NT_OFFSET;
261 int arrow = x & ((1<<7)-1);
Guido van Rossum86bea461997-04-29 21:03:06 +0000262 dfa *d1 = PyGrammar_FindDFA(
263 ps->p_grammar, nt);
Jeremy Hylton94988062000-06-20 19:10:44 +0000264 if ((err = push(&ps->p_stack, nt, d1,
265 arrow, lineno)) > 0) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000266 D(printf(" MemError: push\n"));
Jeremy Hylton94988062000-06-20 19:10:44 +0000267 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000268 }
269 D(printf(" Push ...\n"));
270 continue;
271 }
272
273 /* Shift the token */
Jeremy Hylton94988062000-06-20 19:10:44 +0000274 if ((err = shift(&ps->p_stack, type, str,
275 x, lineno)) > 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000276 D(printf(" MemError: shift.\n"));
Jeremy Hylton94988062000-06-20 19:10:44 +0000277 return err;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000278 }
279 D(printf(" Shift.\n"));
280 /* Pop while we are in an accept-only state */
281 while (s = &d->d_state
282 [ps->p_stack.s_top->s_state],
283 s->s_accept && s->s_narcs == 1) {
284 D(printf(" Direct pop.\n"));
285 s_pop(&ps->p_stack);
286 if (s_empty(&ps->p_stack)) {
287 D(printf(" ACCEPT.\n"));
288 return E_DONE;
289 }
290 d = ps->p_stack.s_top->s_dfa;
291 }
292 return E_OK;
293 }
294 }
295
296 if (s->s_accept) {
297 /* Pop this dfa and try again */
298 s_pop(&ps->p_stack);
299 D(printf(" Pop ...\n"));
300 if (s_empty(&ps->p_stack)) {
301 D(printf(" Error: bottom of stack.\n"));
302 return E_SYNTAX;
303 }
304 continue;
305 }
306
307 /* Stuck, report syntax error */
308 D(printf(" Error.\n"));
309 return E_SYNTAX;
310 }
311}
312
313
Guido van Rossum408027e1996-12-30 16:17:54 +0000314#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000315
316/* DEBUG OUTPUT */
317
318void
319dumptree(g, n)
320 grammar *g;
321 node *n;
322{
323 int i;
324
325 if (n == NULL)
326 printf("NIL");
327 else {
328 label l;
329 l.lb_type = TYPE(n);
Guido van Rossumcf7448b1992-09-03 20:46:37 +0000330 l.lb_str = STR(n);
Guido van Rossum86bea461997-04-29 21:03:06 +0000331 printf("%s", PyGrammar_LabelRepr(&l));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000332 if (ISNONTERMINAL(TYPE(n))) {
333 printf("(");
334 for (i = 0; i < NCH(n); i++) {
335 if (i > 0)
336 printf(",");
337 dumptree(g, CHILD(n, i));
338 }
339 printf(")");
340 }
341 }
342}
343
344void
345showtree(g, n)
346 grammar *g;
347 node *n;
348{
349 int i;
350
351 if (n == NULL)
352 return;
353 if (ISNONTERMINAL(TYPE(n))) {
354 for (i = 0; i < NCH(n); i++)
355 showtree(g, CHILD(n, i));
356 }
357 else if (ISTERMINAL(TYPE(n))) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000358 printf("%s", _PyParser_TokenNames[TYPE(n)]);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000359 if (TYPE(n) == NUMBER || TYPE(n) == NAME)
360 printf("(%s)", STR(n));
361 printf(" ");
362 }
363 else
364 printf("? ");
365}
366
367void
368printtree(ps)
369 parser_state *ps;
370{
Guido van Rossum86bea461997-04-29 21:03:06 +0000371 if (Py_DebugFlag) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000372 printf("Parse tree:\n");
373 dumptree(ps->p_grammar, ps->p_tree);
374 printf("\n");
375 printf("Tokens:\n");
376 showtree(ps->p_grammar, ps->p_tree);
377 printf("\n");
378 }
379 printf("Listing:\n");
Guido van Rossum86bea461997-04-29 21:03:06 +0000380 PyNode_ListTree(ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000381 printf("\n");
382}
383
Guido van Rossum408027e1996-12-30 16:17:54 +0000384#endif /* Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000385
386/*
387
388Description
389-----------
390
391The parser's interface is different than usual: the function addtoken()
392must be called for each token in the input. This makes it possible to
393turn it into an incremental parsing system later. The parsing system
394constructs a parse tree as it goes.
395
396A parsing rule is represented as a Deterministic Finite-state Automaton
397(DFA). A node in a DFA represents a state of the parser; an arc represents
398a transition. Transitions are either labeled with terminal symbols or
399with non-terminals. When the parser decides to follow an arc labeled
400with a non-terminal, it is invoked recursively with the DFA representing
401the parsing rule for that as its initial state; when that DFA accepts,
402the parser that invoked it continues. The parse tree constructed by the
403recursively called parser is inserted as a child in the current parse tree.
404
405The DFA's can be constructed automatically from a more conventional
406language description. An extended LL(1) grammar (ELL(1)) is suitable.
407Certain restrictions make the parser's life easier: rules that can produce
408the empty string should be outlawed (there are other ways to put loops
409or optional parts in the language). To avoid the need to construct
410FIRST sets, we can require that all but the last alternative of a rule
411(really: arc going out of a DFA's state) must begin with a terminal
412symbol.
413
414As an example, consider this grammar:
415
416expr: term (OP term)*
417term: CONSTANT | '(' expr ')'
418
419The DFA corresponding to the rule for expr is:
420
421------->.---term-->.------->
422 ^ |
423 | |
424 \----OP----/
425
426The parse tree generated for the input a+b is:
427
428(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
429
430*/