blob: 0bc38dc343fe78561201201bc4c922d22f06e1af [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00002Copyright (c) 2000, BeOpen.com.
3Copyright (c) 1995-2000, Corporation for National Research Initiatives.
4Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
5All rights reserved.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00006
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00007See the file "Misc/COPYRIGHT" for information on usage and
8redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009******************************************************************/
10
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000011/* Parser generator */
Guido van Rossum3f5da241990-12-20 15:06:42 +000012/* XXX This file is not yet fully PROTOized */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000013
14/* For a description, see the comments at end of this file */
15
Guido van Rossum3f5da241990-12-20 15:06:42 +000016#include "pgenheaders.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000017#include "assert.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000018#include "token.h"
19#include "node.h"
20#include "grammar.h"
21#include "metagrammar.h"
22#include "pgen.h"
23
Guido van Rossum86bea461997-04-29 21:03:06 +000024extern int Py_DebugFlag;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000025
26
27/* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */
28
29typedef struct _nfaarc {
30 int ar_label;
31 int ar_arrow;
32} nfaarc;
33
34typedef struct _nfastate {
35 int st_narcs;
36 nfaarc *st_arc;
37} nfastate;
38
39typedef struct _nfa {
40 int nf_type;
41 char *nf_name;
42 int nf_nstates;
43 nfastate *nf_state;
44 int nf_start, nf_finish;
45} nfa;
46
Guido van Rossum71f477c1991-04-03 19:09:02 +000047/* Forward */
Tim Petersdbd9ba62000-07-09 03:09:57 +000048static void compile_rhs(labellist *ll,
49 nfa *nf, node *n, int *pa, int *pb);
50static void compile_alt(labellist *ll,
51 nfa *nf, node *n, int *pa, int *pb);
52static void compile_item(labellist *ll,
53 nfa *nf, node *n, int *pa, int *pb);
54static void compile_atom(labellist *ll,
55 nfa *nf, node *n, int *pa, int *pb);
Guido van Rossum71f477c1991-04-03 19:09:02 +000056
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000057static int
Thomas Wouters23c9e002000-07-22 19:20:54 +000058addnfastate(nfa *nf)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000059{
60 nfastate *st;
61
Guido van Rossum86bea461997-04-29 21:03:06 +000062 PyMem_RESIZE(nf->nf_state, nfastate, nf->nf_nstates + 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000063 if (nf->nf_state == NULL)
Guido van Rossum86bea461997-04-29 21:03:06 +000064 Py_FatalError("out of mem");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000065 st = &nf->nf_state[nf->nf_nstates++];
66 st->st_narcs = 0;
67 st->st_arc = NULL;
68 return st - nf->nf_state;
69}
70
71static void
Thomas Wouters23c9e002000-07-22 19:20:54 +000072addnfaarc(nfa *nf, int from, int to, int lbl)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000073{
74 nfastate *st;
75 nfaarc *ar;
76
77 st = &nf->nf_state[from];
Guido van Rossum86bea461997-04-29 21:03:06 +000078 PyMem_RESIZE(st->st_arc, nfaarc, st->st_narcs + 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000079 if (st->st_arc == NULL)
Guido van Rossum86bea461997-04-29 21:03:06 +000080 Py_FatalError("out of mem");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000081 ar = &st->st_arc[st->st_narcs++];
82 ar->ar_label = lbl;
83 ar->ar_arrow = to;
84}
85
86static nfa *
Thomas Wouters23c9e002000-07-22 19:20:54 +000087newnfa(char *name)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000088{
89 nfa *nf;
Guido van Rossumbb3649e1998-04-10 22:09:39 +000090 static int type = NT_OFFSET; /* All types will be disjunct */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000091
Guido van Rossum86bea461997-04-29 21:03:06 +000092 nf = PyMem_NEW(nfa, 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000093 if (nf == NULL)
Guido van Rossum86bea461997-04-29 21:03:06 +000094 Py_FatalError("no mem for new nfa");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000095 nf->nf_type = type++;
96 nf->nf_name = name; /* XXX strdup(name) ??? */
97 nf->nf_nstates = 0;
98 nf->nf_state = NULL;
99 nf->nf_start = nf->nf_finish = -1;
100 return nf;
101}
102
103typedef struct _nfagrammar {
104 int gr_nnfas;
105 nfa **gr_nfa;
106 labellist gr_ll;
107} nfagrammar;
108
Guido van Rossum71f477c1991-04-03 19:09:02 +0000109/* Forward */
Tim Petersdbd9ba62000-07-09 03:09:57 +0000110static void compile_rule(nfagrammar *gr, node *n);
Guido van Rossum71f477c1991-04-03 19:09:02 +0000111
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000112static nfagrammar *
Thomas Wouters23c9e002000-07-22 19:20:54 +0000113newnfagrammar(void)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000114{
115 nfagrammar *gr;
116
Guido van Rossum86bea461997-04-29 21:03:06 +0000117 gr = PyMem_NEW(nfagrammar, 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000118 if (gr == NULL)
Guido van Rossum86bea461997-04-29 21:03:06 +0000119 Py_FatalError("no mem for new nfa grammar");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000120 gr->gr_nnfas = 0;
121 gr->gr_nfa = NULL;
122 gr->gr_ll.ll_nlabels = 0;
123 gr->gr_ll.ll_label = NULL;
124 addlabel(&gr->gr_ll, ENDMARKER, "EMPTY");
125 return gr;
126}
127
128static nfa *
Thomas Wouters23c9e002000-07-22 19:20:54 +0000129addnfa(nfagrammar *gr, char *name)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000130{
131 nfa *nf;
132
133 nf = newnfa(name);
Guido van Rossum86bea461997-04-29 21:03:06 +0000134 PyMem_RESIZE(gr->gr_nfa, nfa *, gr->gr_nnfas + 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000135 if (gr->gr_nfa == NULL)
Guido van Rossum86bea461997-04-29 21:03:06 +0000136 Py_FatalError("out of mem");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000137 gr->gr_nfa[gr->gr_nnfas++] = nf;
138 addlabel(&gr->gr_ll, NAME, nf->nf_name);
139 return nf;
140}
141
Guido van Rossum408027e1996-12-30 16:17:54 +0000142#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000143
144static char REQNFMT[] = "metacompile: less than %d children\n";
145
146#define REQN(i, count) \
147 if (i < count) { \
148 fprintf(stderr, REQNFMT, count); \
Guido van Rossum86bea461997-04-29 21:03:06 +0000149 Py_FatalError("REQN"); \
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000150 } else
151
152#else
153#define REQN(i, count) /* empty */
154#endif
155
156static nfagrammar *
Thomas Wouters23c9e002000-07-22 19:20:54 +0000157metacompile(node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000158{
159 nfagrammar *gr;
160 int i;
161
162 printf("Compiling (meta-) parse tree into NFA grammar\n");
163 gr = newnfagrammar();
164 REQ(n, MSTART);
165 i = n->n_nchildren - 1; /* Last child is ENDMARKER */
166 n = n->n_child;
167 for (; --i >= 0; n++) {
168 if (n->n_type != NEWLINE)
169 compile_rule(gr, n);
170 }
171 return gr;
172}
173
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000174static void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000175compile_rule(nfagrammar *gr, node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000176{
177 nfa *nf;
178
179 REQ(n, RULE);
180 REQN(n->n_nchildren, 4);
181 n = n->n_child;
182 REQ(n, NAME);
183 nf = addnfa(gr, n->n_str);
184 n++;
185 REQ(n, COLON);
186 n++;
187 REQ(n, RHS);
188 compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish);
189 n++;
190 REQ(n, NEWLINE);
191}
192
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000193static void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000194compile_rhs(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000195{
196 int i;
197 int a, b;
198
199 REQ(n, RHS);
200 i = n->n_nchildren;
201 REQN(i, 1);
202 n = n->n_child;
203 REQ(n, ALT);
204 compile_alt(ll, nf, n, pa, pb);
205 if (--i <= 0)
206 return;
207 n++;
208 a = *pa;
209 b = *pb;
210 *pa = addnfastate(nf);
211 *pb = addnfastate(nf);
212 addnfaarc(nf, *pa, a, EMPTY);
213 addnfaarc(nf, b, *pb, EMPTY);
214 for (; --i >= 0; n++) {
215 REQ(n, VBAR);
216 REQN(i, 1);
217 --i;
218 n++;
219 REQ(n, ALT);
220 compile_alt(ll, nf, n, &a, &b);
221 addnfaarc(nf, *pa, a, EMPTY);
222 addnfaarc(nf, b, *pb, EMPTY);
223 }
224}
225
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000226static void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000227compile_alt(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000228{
229 int i;
230 int a, b;
231
232 REQ(n, ALT);
233 i = n->n_nchildren;
234 REQN(i, 1);
235 n = n->n_child;
236 REQ(n, ITEM);
237 compile_item(ll, nf, n, pa, pb);
238 --i;
239 n++;
240 for (; --i >= 0; n++) {
241 if (n->n_type == COMMA) { /* XXX Temporary */
242 REQN(i, 1);
243 --i;
244 n++;
245 }
246 REQ(n, ITEM);
247 compile_item(ll, nf, n, &a, &b);
248 addnfaarc(nf, *pb, a, EMPTY);
249 *pb = b;
250 }
251}
252
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000253static void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000254compile_item(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000255{
256 int i;
257 int a, b;
258
259 REQ(n, ITEM);
260 i = n->n_nchildren;
261 REQN(i, 1);
262 n = n->n_child;
263 if (n->n_type == LSQB) {
264 REQN(i, 3);
265 n++;
266 REQ(n, RHS);
267 *pa = addnfastate(nf);
268 *pb = addnfastate(nf);
269 addnfaarc(nf, *pa, *pb, EMPTY);
270 compile_rhs(ll, nf, n, &a, &b);
271 addnfaarc(nf, *pa, a, EMPTY);
272 addnfaarc(nf, b, *pb, EMPTY);
273 REQN(i, 1);
274 n++;
275 REQ(n, RSQB);
276 }
277 else {
278 compile_atom(ll, nf, n, pa, pb);
279 if (--i <= 0)
280 return;
281 n++;
282 addnfaarc(nf, *pb, *pa, EMPTY);
283 if (n->n_type == STAR)
284 *pb = *pa;
285 else
286 REQ(n, PLUS);
287 }
288}
289
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000290static void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000291compile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000292{
293 int i;
294
295 REQ(n, ATOM);
296 i = n->n_nchildren;
297 REQN(i, 1);
298 n = n->n_child;
299 if (n->n_type == LPAR) {
300 REQN(i, 3);
301 n++;
302 REQ(n, RHS);
303 compile_rhs(ll, nf, n, pa, pb);
304 n++;
305 REQ(n, RPAR);
306 }
307 else if (n->n_type == NAME || n->n_type == STRING) {
308 *pa = addnfastate(nf);
309 *pb = addnfastate(nf);
310 addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str));
311 }
312 else
313 REQ(n, NAME);
314}
315
316static void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000317dumpstate(labellist *ll, nfa *nf, int istate)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000318{
319 nfastate *st;
320 int i;
321 nfaarc *ar;
322
323 printf("%c%2d%c",
324 istate == nf->nf_start ? '*' : ' ',
325 istate,
326 istate == nf->nf_finish ? '.' : ' ');
327 st = &nf->nf_state[istate];
328 ar = st->st_arc;
329 for (i = 0; i < st->st_narcs; i++) {
330 if (i > 0)
331 printf("\n ");
332 printf("-> %2d %s", ar->ar_arrow,
Guido van Rossum86bea461997-04-29 21:03:06 +0000333 PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label]));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334 ar++;
335 }
336 printf("\n");
337}
338
339static void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000340dumpnfa(labellist *ll, nfa *nf)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000341{
342 int i;
343
344 printf("NFA '%s' has %d states; start %d, finish %d\n",
345 nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish);
346 for (i = 0; i < nf->nf_nstates; i++)
347 dumpstate(ll, nf, i);
348}
349
350
351/* PART TWO -- CONSTRUCT DFA -- Algorithm 3.1 from [Aho&Ullman 77] */
352
Guido van Rossum588633d1994-12-30 15:46:02 +0000353static void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000354addclosure(bitset ss, nfa *nf, int istate)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000355{
356 if (addbit(ss, istate)) {
357 nfastate *st = &nf->nf_state[istate];
358 nfaarc *ar = st->st_arc;
359 int i;
360
361 for (i = st->st_narcs; --i >= 0; ) {
362 if (ar->ar_label == EMPTY)
363 addclosure(ss, nf, ar->ar_arrow);
364 ar++;
365 }
366 }
367}
368
369typedef struct _ss_arc {
370 bitset sa_bitset;
371 int sa_arrow;
372 int sa_label;
373} ss_arc;
374
375typedef struct _ss_state {
376 bitset ss_ss;
377 int ss_narcs;
378 ss_arc *ss_arc;
379 int ss_deleted;
380 int ss_finish;
381 int ss_rename;
382} ss_state;
383
384typedef struct _ss_dfa {
385 int sd_nstates;
386 ss_state *sd_state;
387} ss_dfa;
388
Guido van Rossum71f477c1991-04-03 19:09:02 +0000389/* Forward */
Tim Petersdbd9ba62000-07-09 03:09:57 +0000390static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
391 labellist *ll, char *msg);
392static void simplify(int xx_nstates, ss_state *xx_state);
393static void convert(dfa *d, int xx_nstates, ss_state *xx_state);
Guido van Rossum71f477c1991-04-03 19:09:02 +0000394
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000395static void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000396makedfa(nfagrammar *gr, nfa *nf, dfa *d)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000397{
398 int nbits = nf->nf_nstates;
399 bitset ss;
400 int xx_nstates;
401 ss_state *xx_state, *yy;
402 ss_arc *zz;
403 int istate, jstate, iarc, jarc, ibit;
404 nfastate *st;
405 nfaarc *ar;
406
407 ss = newbitset(nbits);
408 addclosure(ss, nf, nf->nf_start);
Guido van Rossum86bea461997-04-29 21:03:06 +0000409 xx_state = PyMem_NEW(ss_state, 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000410 if (xx_state == NULL)
Guido van Rossum86bea461997-04-29 21:03:06 +0000411 Py_FatalError("no mem for xx_state in makedfa");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000412 xx_nstates = 1;
413 yy = &xx_state[0];
414 yy->ss_ss = ss;
415 yy->ss_narcs = 0;
416 yy->ss_arc = NULL;
417 yy->ss_deleted = 0;
418 yy->ss_finish = testbit(ss, nf->nf_finish);
419 if (yy->ss_finish)
420 printf("Error: nonterminal '%s' may produce empty.\n",
421 nf->nf_name);
422
423 /* This algorithm is from a book written before
424 the invention of structured programming... */
425
426 /* For each unmarked state... */
427 for (istate = 0; istate < xx_nstates; ++istate) {
428 yy = &xx_state[istate];
429 ss = yy->ss_ss;
430 /* For all its states... */
431 for (ibit = 0; ibit < nf->nf_nstates; ++ibit) {
432 if (!testbit(ss, ibit))
433 continue;
434 st = &nf->nf_state[ibit];
435 /* For all non-empty arcs from this state... */
436 for (iarc = 0; iarc < st->st_narcs; iarc++) {
437 ar = &st->st_arc[iarc];
438 if (ar->ar_label == EMPTY)
439 continue;
440 /* Look up in list of arcs from this state */
441 for (jarc = 0; jarc < yy->ss_narcs; ++jarc) {
442 zz = &yy->ss_arc[jarc];
443 if (ar->ar_label == zz->sa_label)
444 goto found;
445 }
446 /* Add new arc for this state */
Guido van Rossum86bea461997-04-29 21:03:06 +0000447 PyMem_RESIZE(yy->ss_arc, ss_arc,
448 yy->ss_narcs + 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000449 if (yy->ss_arc == NULL)
Guido van Rossum86bea461997-04-29 21:03:06 +0000450 Py_FatalError("out of mem");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000451 zz = &yy->ss_arc[yy->ss_narcs++];
452 zz->sa_label = ar->ar_label;
453 zz->sa_bitset = newbitset(nbits);
454 zz->sa_arrow = -1;
455 found: ;
456 /* Add destination */
457 addclosure(zz->sa_bitset, nf, ar->ar_arrow);
458 }
459 }
460 /* Now look up all the arrow states */
461 for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) {
462 zz = &xx_state[istate].ss_arc[jarc];
463 for (jstate = 0; jstate < xx_nstates; jstate++) {
464 if (samebitset(zz->sa_bitset,
465 xx_state[jstate].ss_ss, nbits)) {
466 zz->sa_arrow = jstate;
467 goto done;
468 }
469 }
Guido van Rossum86bea461997-04-29 21:03:06 +0000470 PyMem_RESIZE(xx_state, ss_state, xx_nstates + 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000471 if (xx_state == NULL)
Guido van Rossum86bea461997-04-29 21:03:06 +0000472 Py_FatalError("out of mem");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000473 zz->sa_arrow = xx_nstates;
474 yy = &xx_state[xx_nstates++];
475 yy->ss_ss = zz->sa_bitset;
476 yy->ss_narcs = 0;
477 yy->ss_arc = NULL;
478 yy->ss_deleted = 0;
479 yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish);
480 done: ;
481 }
482 }
483
Guido van Rossum86bea461997-04-29 21:03:06 +0000484 if (Py_DebugFlag)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000485 printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
486 "before minimizing");
487
488 simplify(xx_nstates, xx_state);
489
Guido van Rossum86bea461997-04-29 21:03:06 +0000490 if (Py_DebugFlag)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000491 printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
492 "after minimizing");
493
494 convert(d, xx_nstates, xx_state);
495
496 /* XXX cleanup */
497}
498
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000499static void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000500printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
501 labellist *ll, char *msg)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000502{
503 int i, ibit, iarc;
504 ss_state *yy;
505 ss_arc *zz;
506
507 printf("Subset DFA %s\n", msg);
508 for (i = 0; i < xx_nstates; i++) {
509 yy = &xx_state[i];
510 if (yy->ss_deleted)
511 continue;
512 printf(" Subset %d", i);
513 if (yy->ss_finish)
514 printf(" (finish)");
515 printf(" { ");
516 for (ibit = 0; ibit < nbits; ibit++) {
517 if (testbit(yy->ss_ss, ibit))
518 printf("%d ", ibit);
519 }
520 printf("}\n");
521 for (iarc = 0; iarc < yy->ss_narcs; iarc++) {
522 zz = &yy->ss_arc[iarc];
523 printf(" Arc to state %d, label %s\n",
524 zz->sa_arrow,
Guido van Rossum86bea461997-04-29 21:03:06 +0000525 PyGrammar_LabelRepr(
526 &ll->ll_label[zz->sa_label]));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000527 }
528 }
529}
530
531
532/* PART THREE -- SIMPLIFY DFA */
533
534/* Simplify the DFA by repeatedly eliminating states that are
535 equivalent to another oner. This is NOT Algorithm 3.3 from
536 [Aho&Ullman 77]. It does not always finds the minimal DFA,
537 but it does usually make a much smaller one... (For an example
Thomas Wouters7e474022000-07-16 12:04:32 +0000538 of sub-optimal behavior, try S: x a b+ | y a b+.)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000539*/
540
541static int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000542samestate(ss_state *s1, ss_state *s2)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000543{
544 int i;
545
546 if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish)
547 return 0;
548 for (i = 0; i < s1->ss_narcs; i++) {
549 if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow ||
550 s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label)
551 return 0;
552 }
553 return 1;
554}
555
556static void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000557renamestates(int xx_nstates, ss_state *xx_state, int from, int to)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000558{
559 int i, j;
560
Guido van Rossum86bea461997-04-29 21:03:06 +0000561 if (Py_DebugFlag)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000562 printf("Rename state %d to %d.\n", from, to);
563 for (i = 0; i < xx_nstates; i++) {
564 if (xx_state[i].ss_deleted)
565 continue;
566 for (j = 0; j < xx_state[i].ss_narcs; j++) {
567 if (xx_state[i].ss_arc[j].sa_arrow == from)
568 xx_state[i].ss_arc[j].sa_arrow = to;
569 }
570 }
571}
572
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000573static void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000574simplify(int xx_nstates, ss_state *xx_state)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000575{
576 int changes;
Guido van Rossum588633d1994-12-30 15:46:02 +0000577 int i, j;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000578
579 do {
580 changes = 0;
581 for (i = 1; i < xx_nstates; i++) {
582 if (xx_state[i].ss_deleted)
583 continue;
584 for (j = 0; j < i; j++) {
585 if (xx_state[j].ss_deleted)
586 continue;
587 if (samestate(&xx_state[i], &xx_state[j])) {
588 xx_state[i].ss_deleted++;
Guido van Rossum86bea461997-04-29 21:03:06 +0000589 renamestates(xx_nstates, xx_state,
590 i, j);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000591 changes++;
592 break;
593 }
594 }
595 }
596 } while (changes);
597}
598
599
600/* PART FOUR -- GENERATE PARSING TABLES */
601
602/* Convert the DFA into a grammar that can be used by our parser */
603
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000604static void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000605convert(dfa *d, int xx_nstates, ss_state *xx_state)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000606{
607 int i, j;
608 ss_state *yy;
609 ss_arc *zz;
610
611 for (i = 0; i < xx_nstates; i++) {
612 yy = &xx_state[i];
613 if (yy->ss_deleted)
614 continue;
615 yy->ss_rename = addstate(d);
616 }
617
618 for (i = 0; i < xx_nstates; i++) {
619 yy = &xx_state[i];
620 if (yy->ss_deleted)
621 continue;
622 for (j = 0; j < yy->ss_narcs; j++) {
623 zz = &yy->ss_arc[j];
624 addarc(d, yy->ss_rename,
625 xx_state[zz->sa_arrow].ss_rename,
626 zz->sa_label);
627 }
628 if (yy->ss_finish)
629 addarc(d, yy->ss_rename, yy->ss_rename, 0);
630 }
631
632 d->d_initial = 0;
633}
634
635
636/* PART FIVE -- GLUE IT ALL TOGETHER */
637
638static grammar *
Thomas Wouters23c9e002000-07-22 19:20:54 +0000639maketables(nfagrammar *gr)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000640{
641 int i;
642 nfa *nf;
643 dfa *d;
644 grammar *g;
645
646 if (gr->gr_nnfas == 0)
647 return NULL;
648 g = newgrammar(gr->gr_nfa[0]->nf_type);
649 /* XXX first rule must be start rule */
650 g->g_ll = gr->gr_ll;
651
652 for (i = 0; i < gr->gr_nnfas; i++) {
653 nf = gr->gr_nfa[i];
Guido van Rossum86bea461997-04-29 21:03:06 +0000654 if (Py_DebugFlag) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000655 printf("Dump of NFA for '%s' ...\n", nf->nf_name);
656 dumpnfa(&gr->gr_ll, nf);
657 }
658 printf("Making DFA for '%s' ...\n", nf->nf_name);
659 d = adddfa(g, nf->nf_type, nf->nf_name);
660 makedfa(gr, gr->gr_nfa[i], d);
661 }
662
663 return g;
664}
665
666grammar *
Thomas Wouters23c9e002000-07-22 19:20:54 +0000667pgen(node *n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000668{
669 nfagrammar *gr;
670 grammar *g;
671
672 gr = metacompile(n);
673 g = maketables(gr);
674 translatelabels(g);
675 addfirstsets(g);
676 return g;
677}
678
679
680/*
681
682Description
683-----------
684
685Input is a grammar in extended BNF (using * for repetition, + for
686at-least-once repetition, [] for optional parts, | for alternatives and
687() for grouping). This has already been parsed and turned into a parse
688tree.
689
690Each rule is considered as a regular expression in its own right.
691It is turned into a Non-deterministic Finite Automaton (NFA), which
692is then turned into a Deterministic Finite Automaton (DFA), which is then
693optimized to reduce the number of states. See [Aho&Ullman 77] chapter 3,
694or similar compiler books (this technique is more often used for lexical
695analyzers).
696
697The DFA's are used by the parser as parsing tables in a special way
698that's probably unique. Before they are usable, the FIRST sets of all
699non-terminals are computed.
700
701Reference
702---------
703
704[Aho&Ullman 77]
705 Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977
706 (first edition)
707
708*/