blob: 3b75dbc3f4d2b9955f16d9b418bbd82c83ba94e0 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumb9f8d6e1995-01-04 19:08:09 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* Parser implementation */
33
34/* For a description, see the comments at end of this file */
35
36/* XXX To do: error recovery */
37
Guido van Rossum3f5da241990-12-20 15:06:42 +000038#include "pgenheaders.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000039#include "assert.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000040#include "token.h"
41#include "grammar.h"
42#include "node.h"
43#include "parser.h"
44#include "errcode.h"
45
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000046
Guido van Rossum408027e1996-12-30 16:17:54 +000047#ifdef Py_DEBUG
Guido van Rossum86bea461997-04-29 21:03:06 +000048extern int Py_DebugFlag;
49#define D(x) if (!Py_DebugFlag); else x
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000050#else
51#define D(x)
52#endif
53
54
55/* STACK DATA TYPE */
56
Guido van Rossum86bea461997-04-29 21:03:06 +000057static void s_reset Py_PROTO((stack *));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000058
59static void
60s_reset(s)
61 stack *s;
62{
63 s->s_top = &s->s_base[MAXSTACK];
64}
65
66#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
67
Guido van Rossum86bea461997-04-29 21:03:06 +000068static int s_push Py_PROTO((stack *, dfa *, node *));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000069
70static int
71s_push(s, d, parent)
72 register stack *s;
73 dfa *d;
74 node *parent;
75{
76 register stackentry *top;
77 if (s->s_top == s->s_base) {
78 fprintf(stderr, "s_push: parser stack overflow\n");
79 return -1;
80 }
81 top = --s->s_top;
82 top->s_dfa = d;
83 top->s_parent = parent;
84 top->s_state = 0;
85 return 0;
86}
87
Guido van Rossum408027e1996-12-30 16:17:54 +000088#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000089
Guido van Rossum86bea461997-04-29 21:03:06 +000090static void s_pop Py_PROTO((stack *));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000091
92static void
93s_pop(s)
94 register stack *s;
95{
Guido van Rossum588633d1994-12-30 15:46:02 +000096 if (s_empty(s))
Guido van Rossum86bea461997-04-29 21:03:06 +000097 Py_FatalError("s_pop: parser stack underflow -- FATAL");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000098 s->s_top++;
99}
100
Guido van Rossum408027e1996-12-30 16:17:54 +0000101#else /* !Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102
103#define s_pop(s) (s)->s_top++
104
105#endif
106
107
108/* PARSER CREATION */
109
110parser_state *
Guido van Rossum86bea461997-04-29 21:03:06 +0000111PyParser_New(g, start)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000112 grammar *g;
113 int start;
114{
115 parser_state *ps;
116
117 if (!g->g_accel)
Guido van Rossum86bea461997-04-29 21:03:06 +0000118 PyGrammar_AddAccelerators(g);
119 ps = PyMem_NEW(parser_state, 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000120 if (ps == NULL)
121 return NULL;
122 ps->p_grammar = g;
Guido van Rossum86bea461997-04-29 21:03:06 +0000123 ps->p_tree = PyNode_New(start);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000124 if (ps->p_tree == NULL) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000125 PyMem_DEL(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000126 return NULL;
127 }
128 s_reset(&ps->p_stack);
Guido van Rossum86bea461997-04-29 21:03:06 +0000129 (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000130 return ps;
131}
132
133void
Guido van Rossum86bea461997-04-29 21:03:06 +0000134PyParser_Delete(ps)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000135 parser_state *ps;
136{
Guido van Rossum99f02d41990-11-18 17:38:42 +0000137 /* NB If you want to save the parse tree,
138 you must set p_tree to NULL before calling delparser! */
Guido van Rossum86bea461997-04-29 21:03:06 +0000139 PyNode_Free(ps->p_tree);
140 PyMem_DEL(ps);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000141}
142
143
144/* PARSER STACK OPERATIONS */
145
Guido van Rossum86bea461997-04-29 21:03:06 +0000146static int shift Py_PROTO((stack *, int, char *, int, int));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000147
148static int
Guido van Rossum3f5da241990-12-20 15:06:42 +0000149shift(s, type, str, newstate, lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000150 register stack *s;
151 int type;
152 char *str;
153 int newstate;
Guido van Rossum3f5da241990-12-20 15:06:42 +0000154 int lineno;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000155{
156 assert(!s_empty(s));
Guido van Rossum86bea461997-04-29 21:03:06 +0000157 if (PyNode_AddChild(s->s_top->s_parent, type, str, lineno) == NULL) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000158 fprintf(stderr, "shift: no mem in addchild\n");
159 return -1;
160 }
161 s->s_top->s_state = newstate;
162 return 0;
163}
164
Guido van Rossum86bea461997-04-29 21:03:06 +0000165static int push Py_PROTO((stack *, int, dfa *, int, int));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000166
167static int
Guido van Rossum3f5da241990-12-20 15:06:42 +0000168push(s, type, d, newstate, lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000169 register stack *s;
170 int type;
171 dfa *d;
172 int newstate;
Guido van Rossum3f5da241990-12-20 15:06:42 +0000173 int lineno;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000174{
175 register node *n;
176 n = s->s_top->s_parent;
177 assert(!s_empty(s));
Guido van Rossum86bea461997-04-29 21:03:06 +0000178 if (PyNode_AddChild(n, type, (char *)NULL, lineno) == NULL) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000179 fprintf(stderr, "push: no mem in addchild\n");
180 return -1;
181 }
182 s->s_top->s_state = newstate;
183 return s_push(s, d, CHILD(n, NCH(n)-1));
184}
185
186
187/* PARSER PROPER */
188
Guido van Rossum86bea461997-04-29 21:03:06 +0000189static int classify Py_PROTO((grammar *, int, char *));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000190
191static int
192classify(g, type, str)
193 grammar *g;
194 register int type;
195 char *str;
196{
197 register int n = g->g_ll.ll_nlabels;
198
199 if (type == NAME) {
200 register char *s = str;
201 register label *l = g->g_ll.ll_label;
202 register int i;
203 for (i = n; i > 0; i--, l++) {
204 if (l->lb_type == NAME && l->lb_str != NULL &&
205 l->lb_str[0] == s[0] &&
206 strcmp(l->lb_str, s) == 0) {
207 D(printf("It's a keyword\n"));
208 return n - i;
209 }
210 }
211 }
212
213 {
214 register label *l = g->g_ll.ll_label;
215 register int i;
216 for (i = n; i > 0; i--, l++) {
217 if (l->lb_type == type && l->lb_str == NULL) {
218 D(printf("It's a token we know\n"));
219 return n - i;
220 }
221 }
222 }
223
224 D(printf("Illegal token\n"));
225 return -1;
226}
227
228int
Guido van Rossum86bea461997-04-29 21:03:06 +0000229PyParser_AddToken(ps, type, str, lineno)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000230 register parser_state *ps;
231 register int type;
232 char *str;
Guido van Rossum3f5da241990-12-20 15:06:42 +0000233 int lineno;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000234{
235 register int ilabel;
236
Guido van Rossum86bea461997-04-29 21:03:06 +0000237 D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000238
239 /* Find out which label this token is */
240 ilabel = classify(ps->p_grammar, type, str);
241 if (ilabel < 0)
242 return E_SYNTAX;
243
244 /* Loop until the token is shifted or an error occurred */
245 for (;;) {
246 /* Fetch the current dfa and state */
247 register dfa *d = ps->p_stack.s_top->s_dfa;
248 register state *s = &d->d_state[ps->p_stack.s_top->s_state];
249
250 D(printf(" DFA '%s', state %d:",
251 d->d_name, ps->p_stack.s_top->s_state));
252
253 /* Check accelerator */
254 if (s->s_lower <= ilabel && ilabel < s->s_upper) {
255 register int x = s->s_accel[ilabel - s->s_lower];
256 if (x != -1) {
257 if (x & (1<<7)) {
258 /* Push non-terminal */
259 int nt = (x >> 8) + NT_OFFSET;
260 int arrow = x & ((1<<7)-1);
Guido van Rossum86bea461997-04-29 21:03:06 +0000261 dfa *d1 = PyGrammar_FindDFA(
262 ps->p_grammar, nt);
Guido van Rossum3f5da241990-12-20 15:06:42 +0000263 if (push(&ps->p_stack, nt, d1,
264 arrow, lineno) < 0) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000265 D(printf(" MemError: push\n"));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000266 return E_NOMEM;
267 }
268 D(printf(" Push ...\n"));
269 continue;
270 }
271
272 /* Shift the token */
Guido van Rossum3f5da241990-12-20 15:06:42 +0000273 if (shift(&ps->p_stack, type, str,
274 x, lineno) < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000275 D(printf(" MemError: shift.\n"));
276 return E_NOMEM;
277 }
278 D(printf(" Shift.\n"));
279 /* Pop while we are in an accept-only state */
280 while (s = &d->d_state
281 [ps->p_stack.s_top->s_state],
282 s->s_accept && s->s_narcs == 1) {
283 D(printf(" Direct pop.\n"));
284 s_pop(&ps->p_stack);
285 if (s_empty(&ps->p_stack)) {
286 D(printf(" ACCEPT.\n"));
287 return E_DONE;
288 }
289 d = ps->p_stack.s_top->s_dfa;
290 }
291 return E_OK;
292 }
293 }
294
295 if (s->s_accept) {
296 /* Pop this dfa and try again */
297 s_pop(&ps->p_stack);
298 D(printf(" Pop ...\n"));
299 if (s_empty(&ps->p_stack)) {
300 D(printf(" Error: bottom of stack.\n"));
301 return E_SYNTAX;
302 }
303 continue;
304 }
305
306 /* Stuck, report syntax error */
307 D(printf(" Error.\n"));
308 return E_SYNTAX;
309 }
310}
311
312
Guido van Rossum408027e1996-12-30 16:17:54 +0000313#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000314
315/* DEBUG OUTPUT */
316
317void
318dumptree(g, n)
319 grammar *g;
320 node *n;
321{
322 int i;
323
324 if (n == NULL)
325 printf("NIL");
326 else {
327 label l;
328 l.lb_type = TYPE(n);
Guido van Rossumcf7448b1992-09-03 20:46:37 +0000329 l.lb_str = STR(n);
Guido van Rossum86bea461997-04-29 21:03:06 +0000330 printf("%s", PyGrammar_LabelRepr(&l));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000331 if (ISNONTERMINAL(TYPE(n))) {
332 printf("(");
333 for (i = 0; i < NCH(n); i++) {
334 if (i > 0)
335 printf(",");
336 dumptree(g, CHILD(n, i));
337 }
338 printf(")");
339 }
340 }
341}
342
343void
344showtree(g, n)
345 grammar *g;
346 node *n;
347{
348 int i;
349
350 if (n == NULL)
351 return;
352 if (ISNONTERMINAL(TYPE(n))) {
353 for (i = 0; i < NCH(n); i++)
354 showtree(g, CHILD(n, i));
355 }
356 else if (ISTERMINAL(TYPE(n))) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000357 printf("%s", _PyParser_TokenNames[TYPE(n)]);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000358 if (TYPE(n) == NUMBER || TYPE(n) == NAME)
359 printf("(%s)", STR(n));
360 printf(" ");
361 }
362 else
363 printf("? ");
364}
365
366void
367printtree(ps)
368 parser_state *ps;
369{
Guido van Rossum86bea461997-04-29 21:03:06 +0000370 if (Py_DebugFlag) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000371 printf("Parse tree:\n");
372 dumptree(ps->p_grammar, ps->p_tree);
373 printf("\n");
374 printf("Tokens:\n");
375 showtree(ps->p_grammar, ps->p_tree);
376 printf("\n");
377 }
378 printf("Listing:\n");
Guido van Rossum86bea461997-04-29 21:03:06 +0000379 PyNode_ListTree(ps->p_tree);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000380 printf("\n");
381}
382
Guido van Rossum408027e1996-12-30 16:17:54 +0000383#endif /* Py_DEBUG */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000384
385/*
386
387Description
388-----------
389
390The parser's interface is different than usual: the function addtoken()
391must be called for each token in the input. This makes it possible to
392turn it into an incremental parsing system later. The parsing system
393constructs a parse tree as it goes.
394
395A parsing rule is represented as a Deterministic Finite-state Automaton
396(DFA). A node in a DFA represents a state of the parser; an arc represents
397a transition. Transitions are either labeled with terminal symbols or
398with non-terminals. When the parser decides to follow an arc labeled
399with a non-terminal, it is invoked recursively with the DFA representing
400the parsing rule for that as its initial state; when that DFA accepts,
401the parser that invoked it continues. The parse tree constructed by the
402recursively called parser is inserted as a child in the current parse tree.
403
404The DFA's can be constructed automatically from a more conventional
405language description. An extended LL(1) grammar (ELL(1)) is suitable.
406Certain restrictions make the parser's life easier: rules that can produce
407the empty string should be outlawed (there are other ways to put loops
408or optional parts in the language). To avoid the need to construct
409FIRST sets, we can require that all but the last alternative of a rule
410(really: arc going out of a DFA's state) must begin with a terminal
411symbol.
412
413As an example, consider this grammar:
414
415expr: term (OP term)*
416term: CONSTANT | '(' expr ')'
417
418The DFA corresponding to the rule for expr is:
419
420------->.---term-->.------->
421 ^ |
422 | |
423 \----OP----/
424
425The parse tree generated for the input a+b is:
426
427(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
428
429*/