blob: 4b2acb87f54569a33c85788ae2a093ed9e8a497f [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumb9f8d6e1995-01-04 19:08:09 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* Parser generator */
Guido van Rossum3f5da241990-12-20 15:06:42 +000033/* XXX This file is not yet fully PROTOized */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000034
35/* For a description, see the comments at end of this file */
36
Guido van Rossum3f5da241990-12-20 15:06:42 +000037#include "pgenheaders.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038#include "assert.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000039#include "token.h"
40#include "node.h"
41#include "grammar.h"
42#include "metagrammar.h"
43#include "pgen.h"
44
Guido van Rossum86bea461997-04-29 21:03:06 +000045extern int Py_DebugFlag;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000046
47
48/* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */
49
50typedef struct _nfaarc {
51 int ar_label;
52 int ar_arrow;
53} nfaarc;
54
55typedef struct _nfastate {
56 int st_narcs;
57 nfaarc *st_arc;
58} nfastate;
59
60typedef struct _nfa {
61 int nf_type;
62 char *nf_name;
63 int nf_nstates;
64 nfastate *nf_state;
65 int nf_start, nf_finish;
66} nfa;
67
Guido van Rossum71f477c1991-04-03 19:09:02 +000068/* Forward */
Guido van Rossum86bea461997-04-29 21:03:06 +000069static void compile_rhs Py_PROTO((labellist *ll,
Guido van Rossumfd8a3931996-12-02 18:27:33 +000070 nfa *nf, node *n, int *pa, int *pb));
Guido van Rossum86bea461997-04-29 21:03:06 +000071static void compile_alt Py_PROTO((labellist *ll,
Guido van Rossumfd8a3931996-12-02 18:27:33 +000072 nfa *nf, node *n, int *pa, int *pb));
Guido van Rossum86bea461997-04-29 21:03:06 +000073static void compile_item Py_PROTO((labellist *ll,
Guido van Rossumfd8a3931996-12-02 18:27:33 +000074 nfa *nf, node *n, int *pa, int *pb));
Guido van Rossum86bea461997-04-29 21:03:06 +000075static void compile_atom Py_PROTO((labellist *ll,
Guido van Rossumfd8a3931996-12-02 18:27:33 +000076 nfa *nf, node *n, int *pa, int *pb));
Guido van Rossum71f477c1991-04-03 19:09:02 +000077
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000078static int
79addnfastate(nf)
80 nfa *nf;
81{
82 nfastate *st;
83
Guido van Rossum86bea461997-04-29 21:03:06 +000084 PyMem_RESIZE(nf->nf_state, nfastate, nf->nf_nstates + 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000085 if (nf->nf_state == NULL)
Guido van Rossum86bea461997-04-29 21:03:06 +000086 Py_FatalError("out of mem");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000087 st = &nf->nf_state[nf->nf_nstates++];
88 st->st_narcs = 0;
89 st->st_arc = NULL;
90 return st - nf->nf_state;
91}
92
93static void
94addnfaarc(nf, from, to, lbl)
95 nfa *nf;
96 int from, to, lbl;
97{
98 nfastate *st;
99 nfaarc *ar;
100
101 st = &nf->nf_state[from];
Guido van Rossum86bea461997-04-29 21:03:06 +0000102 PyMem_RESIZE(st->st_arc, nfaarc, st->st_narcs + 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103 if (st->st_arc == NULL)
Guido van Rossum86bea461997-04-29 21:03:06 +0000104 Py_FatalError("out of mem");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000105 ar = &st->st_arc[st->st_narcs++];
106 ar->ar_label = lbl;
107 ar->ar_arrow = to;
108}
109
110static nfa *
111newnfa(name)
112 char *name;
113{
114 nfa *nf;
115 static type = NT_OFFSET; /* All types will be disjunct */
116
Guido van Rossum86bea461997-04-29 21:03:06 +0000117 nf = PyMem_NEW(nfa, 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000118 if (nf == NULL)
Guido van Rossum86bea461997-04-29 21:03:06 +0000119 Py_FatalError("no mem for new nfa");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000120 nf->nf_type = type++;
121 nf->nf_name = name; /* XXX strdup(name) ??? */
122 nf->nf_nstates = 0;
123 nf->nf_state = NULL;
124 nf->nf_start = nf->nf_finish = -1;
125 return nf;
126}
127
128typedef struct _nfagrammar {
129 int gr_nnfas;
130 nfa **gr_nfa;
131 labellist gr_ll;
132} nfagrammar;
133
Guido van Rossum71f477c1991-04-03 19:09:02 +0000134/* Forward */
Guido van Rossum86bea461997-04-29 21:03:06 +0000135static void compile_rule Py_PROTO((nfagrammar *gr, node *n));
Guido van Rossum71f477c1991-04-03 19:09:02 +0000136
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000137static nfagrammar *
138newnfagrammar()
139{
140 nfagrammar *gr;
141
Guido van Rossum86bea461997-04-29 21:03:06 +0000142 gr = PyMem_NEW(nfagrammar, 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000143 if (gr == NULL)
Guido van Rossum86bea461997-04-29 21:03:06 +0000144 Py_FatalError("no mem for new nfa grammar");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000145 gr->gr_nnfas = 0;
146 gr->gr_nfa = NULL;
147 gr->gr_ll.ll_nlabels = 0;
148 gr->gr_ll.ll_label = NULL;
149 addlabel(&gr->gr_ll, ENDMARKER, "EMPTY");
150 return gr;
151}
152
153static nfa *
154addnfa(gr, name)
155 nfagrammar *gr;
156 char *name;
157{
158 nfa *nf;
159
160 nf = newnfa(name);
Guido van Rossum86bea461997-04-29 21:03:06 +0000161 PyMem_RESIZE(gr->gr_nfa, nfa *, gr->gr_nnfas + 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000162 if (gr->gr_nfa == NULL)
Guido van Rossum86bea461997-04-29 21:03:06 +0000163 Py_FatalError("out of mem");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000164 gr->gr_nfa[gr->gr_nnfas++] = nf;
165 addlabel(&gr->gr_ll, NAME, nf->nf_name);
166 return nf;
167}
168
Guido van Rossum408027e1996-12-30 16:17:54 +0000169#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000170
171static char REQNFMT[] = "metacompile: less than %d children\n";
172
173#define REQN(i, count) \
174 if (i < count) { \
175 fprintf(stderr, REQNFMT, count); \
Guido van Rossum86bea461997-04-29 21:03:06 +0000176 Py_FatalError("REQN"); \
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000177 } else
178
179#else
180#define REQN(i, count) /* empty */
181#endif
182
183static nfagrammar *
184metacompile(n)
185 node *n;
186{
187 nfagrammar *gr;
188 int i;
189
190 printf("Compiling (meta-) parse tree into NFA grammar\n");
191 gr = newnfagrammar();
192 REQ(n, MSTART);
193 i = n->n_nchildren - 1; /* Last child is ENDMARKER */
194 n = n->n_child;
195 for (; --i >= 0; n++) {
196 if (n->n_type != NEWLINE)
197 compile_rule(gr, n);
198 }
199 return gr;
200}
201
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000202static void
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000203compile_rule(gr, n)
204 nfagrammar *gr;
205 node *n;
206{
207 nfa *nf;
208
209 REQ(n, RULE);
210 REQN(n->n_nchildren, 4);
211 n = n->n_child;
212 REQ(n, NAME);
213 nf = addnfa(gr, n->n_str);
214 n++;
215 REQ(n, COLON);
216 n++;
217 REQ(n, RHS);
218 compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish);
219 n++;
220 REQ(n, NEWLINE);
221}
222
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000223static void
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000224compile_rhs(ll, nf, n, pa, pb)
225 labellist *ll;
226 nfa *nf;
227 node *n;
228 int *pa, *pb;
229{
230 int i;
231 int a, b;
232
233 REQ(n, RHS);
234 i = n->n_nchildren;
235 REQN(i, 1);
236 n = n->n_child;
237 REQ(n, ALT);
238 compile_alt(ll, nf, n, pa, pb);
239 if (--i <= 0)
240 return;
241 n++;
242 a = *pa;
243 b = *pb;
244 *pa = addnfastate(nf);
245 *pb = addnfastate(nf);
246 addnfaarc(nf, *pa, a, EMPTY);
247 addnfaarc(nf, b, *pb, EMPTY);
248 for (; --i >= 0; n++) {
249 REQ(n, VBAR);
250 REQN(i, 1);
251 --i;
252 n++;
253 REQ(n, ALT);
254 compile_alt(ll, nf, n, &a, &b);
255 addnfaarc(nf, *pa, a, EMPTY);
256 addnfaarc(nf, b, *pb, EMPTY);
257 }
258}
259
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000260static void
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000261compile_alt(ll, nf, n, pa, pb)
262 labellist *ll;
263 nfa *nf;
264 node *n;
265 int *pa, *pb;
266{
267 int i;
268 int a, b;
269
270 REQ(n, ALT);
271 i = n->n_nchildren;
272 REQN(i, 1);
273 n = n->n_child;
274 REQ(n, ITEM);
275 compile_item(ll, nf, n, pa, pb);
276 --i;
277 n++;
278 for (; --i >= 0; n++) {
279 if (n->n_type == COMMA) { /* XXX Temporary */
280 REQN(i, 1);
281 --i;
282 n++;
283 }
284 REQ(n, ITEM);
285 compile_item(ll, nf, n, &a, &b);
286 addnfaarc(nf, *pb, a, EMPTY);
287 *pb = b;
288 }
289}
290
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000291static void
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000292compile_item(ll, nf, n, pa, pb)
293 labellist *ll;
294 nfa *nf;
295 node *n;
296 int *pa, *pb;
297{
298 int i;
299 int a, b;
300
301 REQ(n, ITEM);
302 i = n->n_nchildren;
303 REQN(i, 1);
304 n = n->n_child;
305 if (n->n_type == LSQB) {
306 REQN(i, 3);
307 n++;
308 REQ(n, RHS);
309 *pa = addnfastate(nf);
310 *pb = addnfastate(nf);
311 addnfaarc(nf, *pa, *pb, EMPTY);
312 compile_rhs(ll, nf, n, &a, &b);
313 addnfaarc(nf, *pa, a, EMPTY);
314 addnfaarc(nf, b, *pb, EMPTY);
315 REQN(i, 1);
316 n++;
317 REQ(n, RSQB);
318 }
319 else {
320 compile_atom(ll, nf, n, pa, pb);
321 if (--i <= 0)
322 return;
323 n++;
324 addnfaarc(nf, *pb, *pa, EMPTY);
325 if (n->n_type == STAR)
326 *pb = *pa;
327 else
328 REQ(n, PLUS);
329 }
330}
331
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000332static void
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000333compile_atom(ll, nf, n, pa, pb)
334 labellist *ll;
335 nfa *nf;
336 node *n;
337 int *pa, *pb;
338{
339 int i;
340
341 REQ(n, ATOM);
342 i = n->n_nchildren;
343 REQN(i, 1);
344 n = n->n_child;
345 if (n->n_type == LPAR) {
346 REQN(i, 3);
347 n++;
348 REQ(n, RHS);
349 compile_rhs(ll, nf, n, pa, pb);
350 n++;
351 REQ(n, RPAR);
352 }
353 else if (n->n_type == NAME || n->n_type == STRING) {
354 *pa = addnfastate(nf);
355 *pb = addnfastate(nf);
356 addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str));
357 }
358 else
359 REQ(n, NAME);
360}
361
362static void
363dumpstate(ll, nf, istate)
364 labellist *ll;
365 nfa *nf;
366 int istate;
367{
368 nfastate *st;
369 int i;
370 nfaarc *ar;
371
372 printf("%c%2d%c",
373 istate == nf->nf_start ? '*' : ' ',
374 istate,
375 istate == nf->nf_finish ? '.' : ' ');
376 st = &nf->nf_state[istate];
377 ar = st->st_arc;
378 for (i = 0; i < st->st_narcs; i++) {
379 if (i > 0)
380 printf("\n ");
381 printf("-> %2d %s", ar->ar_arrow,
Guido van Rossum86bea461997-04-29 21:03:06 +0000382 PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label]));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000383 ar++;
384 }
385 printf("\n");
386}
387
388static void
389dumpnfa(ll, nf)
390 labellist *ll;
391 nfa *nf;
392{
393 int i;
394
395 printf("NFA '%s' has %d states; start %d, finish %d\n",
396 nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish);
397 for (i = 0; i < nf->nf_nstates; i++)
398 dumpstate(ll, nf, i);
399}
400
401
402/* PART TWO -- CONSTRUCT DFA -- Algorithm 3.1 from [Aho&Ullman 77] */
403
Guido van Rossum588633d1994-12-30 15:46:02 +0000404static void
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000405addclosure(ss, nf, istate)
406 bitset ss;
407 nfa *nf;
408 int istate;
409{
410 if (addbit(ss, istate)) {
411 nfastate *st = &nf->nf_state[istate];
412 nfaarc *ar = st->st_arc;
413 int i;
414
415 for (i = st->st_narcs; --i >= 0; ) {
416 if (ar->ar_label == EMPTY)
417 addclosure(ss, nf, ar->ar_arrow);
418 ar++;
419 }
420 }
421}
422
423typedef struct _ss_arc {
424 bitset sa_bitset;
425 int sa_arrow;
426 int sa_label;
427} ss_arc;
428
429typedef struct _ss_state {
430 bitset ss_ss;
431 int ss_narcs;
432 ss_arc *ss_arc;
433 int ss_deleted;
434 int ss_finish;
435 int ss_rename;
436} ss_state;
437
438typedef struct _ss_dfa {
439 int sd_nstates;
440 ss_state *sd_state;
441} ss_dfa;
442
Guido van Rossum71f477c1991-04-03 19:09:02 +0000443/* Forward */
Guido van Rossum86bea461997-04-29 21:03:06 +0000444static void printssdfa Py_PROTO((int xx_nstates, ss_state *xx_state, int nbits,
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000445 labellist *ll, char *msg));
Guido van Rossum86bea461997-04-29 21:03:06 +0000446static void simplify Py_PROTO((int xx_nstates, ss_state *xx_state));
447static void convert Py_PROTO((dfa *d, int xx_nstates, ss_state *xx_state));
Guido van Rossum71f477c1991-04-03 19:09:02 +0000448
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000449static void
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000450makedfa(gr, nf, d)
451 nfagrammar *gr;
452 nfa *nf;
453 dfa *d;
454{
455 int nbits = nf->nf_nstates;
456 bitset ss;
457 int xx_nstates;
458 ss_state *xx_state, *yy;
459 ss_arc *zz;
460 int istate, jstate, iarc, jarc, ibit;
461 nfastate *st;
462 nfaarc *ar;
463
464 ss = newbitset(nbits);
465 addclosure(ss, nf, nf->nf_start);
Guido van Rossum86bea461997-04-29 21:03:06 +0000466 xx_state = PyMem_NEW(ss_state, 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000467 if (xx_state == NULL)
Guido van Rossum86bea461997-04-29 21:03:06 +0000468 Py_FatalError("no mem for xx_state in makedfa");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000469 xx_nstates = 1;
470 yy = &xx_state[0];
471 yy->ss_ss = ss;
472 yy->ss_narcs = 0;
473 yy->ss_arc = NULL;
474 yy->ss_deleted = 0;
475 yy->ss_finish = testbit(ss, nf->nf_finish);
476 if (yy->ss_finish)
477 printf("Error: nonterminal '%s' may produce empty.\n",
478 nf->nf_name);
479
480 /* This algorithm is from a book written before
481 the invention of structured programming... */
482
483 /* For each unmarked state... */
484 for (istate = 0; istate < xx_nstates; ++istate) {
485 yy = &xx_state[istate];
486 ss = yy->ss_ss;
487 /* For all its states... */
488 for (ibit = 0; ibit < nf->nf_nstates; ++ibit) {
489 if (!testbit(ss, ibit))
490 continue;
491 st = &nf->nf_state[ibit];
492 /* For all non-empty arcs from this state... */
493 for (iarc = 0; iarc < st->st_narcs; iarc++) {
494 ar = &st->st_arc[iarc];
495 if (ar->ar_label == EMPTY)
496 continue;
497 /* Look up in list of arcs from this state */
498 for (jarc = 0; jarc < yy->ss_narcs; ++jarc) {
499 zz = &yy->ss_arc[jarc];
500 if (ar->ar_label == zz->sa_label)
501 goto found;
502 }
503 /* Add new arc for this state */
Guido van Rossum86bea461997-04-29 21:03:06 +0000504 PyMem_RESIZE(yy->ss_arc, ss_arc,
505 yy->ss_narcs + 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000506 if (yy->ss_arc == NULL)
Guido van Rossum86bea461997-04-29 21:03:06 +0000507 Py_FatalError("out of mem");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000508 zz = &yy->ss_arc[yy->ss_narcs++];
509 zz->sa_label = ar->ar_label;
510 zz->sa_bitset = newbitset(nbits);
511 zz->sa_arrow = -1;
512 found: ;
513 /* Add destination */
514 addclosure(zz->sa_bitset, nf, ar->ar_arrow);
515 }
516 }
517 /* Now look up all the arrow states */
518 for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) {
519 zz = &xx_state[istate].ss_arc[jarc];
520 for (jstate = 0; jstate < xx_nstates; jstate++) {
521 if (samebitset(zz->sa_bitset,
522 xx_state[jstate].ss_ss, nbits)) {
523 zz->sa_arrow = jstate;
524 goto done;
525 }
526 }
Guido van Rossum86bea461997-04-29 21:03:06 +0000527 PyMem_RESIZE(xx_state, ss_state, xx_nstates + 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000528 if (xx_state == NULL)
Guido van Rossum86bea461997-04-29 21:03:06 +0000529 Py_FatalError("out of mem");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000530 zz->sa_arrow = xx_nstates;
531 yy = &xx_state[xx_nstates++];
532 yy->ss_ss = zz->sa_bitset;
533 yy->ss_narcs = 0;
534 yy->ss_arc = NULL;
535 yy->ss_deleted = 0;
536 yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish);
537 done: ;
538 }
539 }
540
Guido van Rossum86bea461997-04-29 21:03:06 +0000541 if (Py_DebugFlag)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000542 printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
543 "before minimizing");
544
545 simplify(xx_nstates, xx_state);
546
Guido van Rossum86bea461997-04-29 21:03:06 +0000547 if (Py_DebugFlag)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000548 printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
549 "after minimizing");
550
551 convert(d, xx_nstates, xx_state);
552
553 /* XXX cleanup */
554}
555
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000556static void
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000557printssdfa(xx_nstates, xx_state, nbits, ll, msg)
558 int xx_nstates;
559 ss_state *xx_state;
560 int nbits;
561 labellist *ll;
562 char *msg;
563{
564 int i, ibit, iarc;
565 ss_state *yy;
566 ss_arc *zz;
567
568 printf("Subset DFA %s\n", msg);
569 for (i = 0; i < xx_nstates; i++) {
570 yy = &xx_state[i];
571 if (yy->ss_deleted)
572 continue;
573 printf(" Subset %d", i);
574 if (yy->ss_finish)
575 printf(" (finish)");
576 printf(" { ");
577 for (ibit = 0; ibit < nbits; ibit++) {
578 if (testbit(yy->ss_ss, ibit))
579 printf("%d ", ibit);
580 }
581 printf("}\n");
582 for (iarc = 0; iarc < yy->ss_narcs; iarc++) {
583 zz = &yy->ss_arc[iarc];
584 printf(" Arc to state %d, label %s\n",
585 zz->sa_arrow,
Guido van Rossum86bea461997-04-29 21:03:06 +0000586 PyGrammar_LabelRepr(
587 &ll->ll_label[zz->sa_label]));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000588 }
589 }
590}
591
592
593/* PART THREE -- SIMPLIFY DFA */
594
595/* Simplify the DFA by repeatedly eliminating states that are
596 equivalent to another oner. This is NOT Algorithm 3.3 from
597 [Aho&Ullman 77]. It does not always finds the minimal DFA,
598 but it does usually make a much smaller one... (For an example
599 of sub-optimal behaviour, try S: x a b+ | y a b+.)
600*/
601
602static int
603samestate(s1, s2)
604 ss_state *s1, *s2;
605{
606 int i;
607
608 if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish)
609 return 0;
610 for (i = 0; i < s1->ss_narcs; i++) {
611 if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow ||
612 s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label)
613 return 0;
614 }
615 return 1;
616}
617
618static void
619renamestates(xx_nstates, xx_state, from, to)
620 int xx_nstates;
621 ss_state *xx_state;
622 int from, to;
623{
624 int i, j;
625
Guido van Rossum86bea461997-04-29 21:03:06 +0000626 if (Py_DebugFlag)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000627 printf("Rename state %d to %d.\n", from, to);
628 for (i = 0; i < xx_nstates; i++) {
629 if (xx_state[i].ss_deleted)
630 continue;
631 for (j = 0; j < xx_state[i].ss_narcs; j++) {
632 if (xx_state[i].ss_arc[j].sa_arrow == from)
633 xx_state[i].ss_arc[j].sa_arrow = to;
634 }
635 }
636}
637
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000638static void
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000639simplify(xx_nstates, xx_state)
640 int xx_nstates;
641 ss_state *xx_state;
642{
643 int changes;
Guido van Rossum588633d1994-12-30 15:46:02 +0000644 int i, j;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000645
646 do {
647 changes = 0;
648 for (i = 1; i < xx_nstates; i++) {
649 if (xx_state[i].ss_deleted)
650 continue;
651 for (j = 0; j < i; j++) {
652 if (xx_state[j].ss_deleted)
653 continue;
654 if (samestate(&xx_state[i], &xx_state[j])) {
655 xx_state[i].ss_deleted++;
Guido van Rossum86bea461997-04-29 21:03:06 +0000656 renamestates(xx_nstates, xx_state,
657 i, j);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000658 changes++;
659 break;
660 }
661 }
662 }
663 } while (changes);
664}
665
666
667/* PART FOUR -- GENERATE PARSING TABLES */
668
669/* Convert the DFA into a grammar that can be used by our parser */
670
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000671static void
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000672convert(d, xx_nstates, xx_state)
673 dfa *d;
674 int xx_nstates;
675 ss_state *xx_state;
676{
677 int i, j;
678 ss_state *yy;
679 ss_arc *zz;
680
681 for (i = 0; i < xx_nstates; i++) {
682 yy = &xx_state[i];
683 if (yy->ss_deleted)
684 continue;
685 yy->ss_rename = addstate(d);
686 }
687
688 for (i = 0; i < xx_nstates; i++) {
689 yy = &xx_state[i];
690 if (yy->ss_deleted)
691 continue;
692 for (j = 0; j < yy->ss_narcs; j++) {
693 zz = &yy->ss_arc[j];
694 addarc(d, yy->ss_rename,
695 xx_state[zz->sa_arrow].ss_rename,
696 zz->sa_label);
697 }
698 if (yy->ss_finish)
699 addarc(d, yy->ss_rename, yy->ss_rename, 0);
700 }
701
702 d->d_initial = 0;
703}
704
705
706/* PART FIVE -- GLUE IT ALL TOGETHER */
707
708static grammar *
709maketables(gr)
710 nfagrammar *gr;
711{
712 int i;
713 nfa *nf;
714 dfa *d;
715 grammar *g;
716
717 if (gr->gr_nnfas == 0)
718 return NULL;
719 g = newgrammar(gr->gr_nfa[0]->nf_type);
720 /* XXX first rule must be start rule */
721 g->g_ll = gr->gr_ll;
722
723 for (i = 0; i < gr->gr_nnfas; i++) {
724 nf = gr->gr_nfa[i];
Guido van Rossum86bea461997-04-29 21:03:06 +0000725 if (Py_DebugFlag) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000726 printf("Dump of NFA for '%s' ...\n", nf->nf_name);
727 dumpnfa(&gr->gr_ll, nf);
728 }
729 printf("Making DFA for '%s' ...\n", nf->nf_name);
730 d = adddfa(g, nf->nf_type, nf->nf_name);
731 makedfa(gr, gr->gr_nfa[i], d);
732 }
733
734 return g;
735}
736
737grammar *
738pgen(n)
739 node *n;
740{
741 nfagrammar *gr;
742 grammar *g;
743
744 gr = metacompile(n);
745 g = maketables(gr);
746 translatelabels(g);
747 addfirstsets(g);
748 return g;
749}
750
751
752/*
753
754Description
755-----------
756
757Input is a grammar in extended BNF (using * for repetition, + for
758at-least-once repetition, [] for optional parts, | for alternatives and
759() for grouping). This has already been parsed and turned into a parse
760tree.
761
762Each rule is considered as a regular expression in its own right.
763It is turned into a Non-deterministic Finite Automaton (NFA), which
764is then turned into a Deterministic Finite Automaton (DFA), which is then
765optimized to reduce the number of states. See [Aho&Ullman 77] chapter 3,
766or similar compiler books (this technique is more often used for lexical
767analyzers).
768
769The DFA's are used by the parser as parsing tables in a special way
770that's probably unique. Before they are usable, the FIRST sets of all
771non-terminals are computed.
772
773Reference
774---------
775
776[Aho&Ullman 77]
777 Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977
778 (first edition)
779
780*/