blob: 96686309397f63d301d2264c44a6082604a06b64 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumb9f8d6e1995-01-04 19:08:09 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* Parser generator */
Guido van Rossum3f5da241990-12-20 15:06:42 +000033/* XXX This file is not yet fully PROTOized */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000034
35/* For a description, see the comments at end of this file */
36
Guido van Rossum3f5da241990-12-20 15:06:42 +000037#include "pgenheaders.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038#include "assert.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000039#include "token.h"
40#include "node.h"
41#include "grammar.h"
42#include "metagrammar.h"
43#include "pgen.h"
44
45extern int debugging;
46
47
48/* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */
49
50typedef struct _nfaarc {
51 int ar_label;
52 int ar_arrow;
53} nfaarc;
54
55typedef struct _nfastate {
56 int st_narcs;
57 nfaarc *st_arc;
58} nfastate;
59
60typedef struct _nfa {
61 int nf_type;
62 char *nf_name;
63 int nf_nstates;
64 nfastate *nf_state;
65 int nf_start, nf_finish;
66} nfa;
67
Guido van Rossum71f477c1991-04-03 19:09:02 +000068/* Forward */
Guido van Rossumfd8a3931996-12-02 18:27:33 +000069static void compile_rhs PROTO((labellist *ll,
70 nfa *nf, node *n, int *pa, int *pb));
71static void compile_alt PROTO((labellist *ll,
72 nfa *nf, node *n, int *pa, int *pb));
73static void compile_item PROTO((labellist *ll,
74 nfa *nf, node *n, int *pa, int *pb));
75static void compile_atom PROTO((labellist *ll,
76 nfa *nf, node *n, int *pa, int *pb));
Guido van Rossum71f477c1991-04-03 19:09:02 +000077
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000078static int
79addnfastate(nf)
80 nfa *nf;
81{
82 nfastate *st;
83
84 RESIZE(nf->nf_state, nfastate, nf->nf_nstates + 1);
85 if (nf->nf_state == NULL)
86 fatal("out of mem");
87 st = &nf->nf_state[nf->nf_nstates++];
88 st->st_narcs = 0;
89 st->st_arc = NULL;
90 return st - nf->nf_state;
91}
92
93static void
94addnfaarc(nf, from, to, lbl)
95 nfa *nf;
96 int from, to, lbl;
97{
98 nfastate *st;
99 nfaarc *ar;
100
101 st = &nf->nf_state[from];
102 RESIZE(st->st_arc, nfaarc, st->st_narcs + 1);
103 if (st->st_arc == NULL)
104 fatal("out of mem");
105 ar = &st->st_arc[st->st_narcs++];
106 ar->ar_label = lbl;
107 ar->ar_arrow = to;
108}
109
110static nfa *
111newnfa(name)
112 char *name;
113{
114 nfa *nf;
115 static type = NT_OFFSET; /* All types will be disjunct */
116
117 nf = NEW(nfa, 1);
118 if (nf == NULL)
119 fatal("no mem for new nfa");
120 nf->nf_type = type++;
121 nf->nf_name = name; /* XXX strdup(name) ??? */
122 nf->nf_nstates = 0;
123 nf->nf_state = NULL;
124 nf->nf_start = nf->nf_finish = -1;
125 return nf;
126}
127
128typedef struct _nfagrammar {
129 int gr_nnfas;
130 nfa **gr_nfa;
131 labellist gr_ll;
132} nfagrammar;
133
Guido van Rossum71f477c1991-04-03 19:09:02 +0000134/* Forward */
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000135static void compile_rule PROTO((nfagrammar *gr, node *n));
Guido van Rossum71f477c1991-04-03 19:09:02 +0000136
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000137static nfagrammar *
138newnfagrammar()
139{
140 nfagrammar *gr;
141
142 gr = NEW(nfagrammar, 1);
143 if (gr == NULL)
144 fatal("no mem for new nfa grammar");
145 gr->gr_nnfas = 0;
146 gr->gr_nfa = NULL;
147 gr->gr_ll.ll_nlabels = 0;
148 gr->gr_ll.ll_label = NULL;
149 addlabel(&gr->gr_ll, ENDMARKER, "EMPTY");
150 return gr;
151}
152
153static nfa *
154addnfa(gr, name)
155 nfagrammar *gr;
156 char *name;
157{
158 nfa *nf;
159
160 nf = newnfa(name);
161 RESIZE(gr->gr_nfa, nfa *, gr->gr_nnfas + 1);
162 if (gr->gr_nfa == NULL)
163 fatal("out of mem");
164 gr->gr_nfa[gr->gr_nnfas++] = nf;
165 addlabel(&gr->gr_ll, NAME, nf->nf_name);
166 return nf;
167}
168
Guido van Rossum408027e1996-12-30 16:17:54 +0000169#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000170
171static char REQNFMT[] = "metacompile: less than %d children\n";
172
173#define REQN(i, count) \
174 if (i < count) { \
175 fprintf(stderr, REQNFMT, count); \
Guido van Rossum588633d1994-12-30 15:46:02 +0000176 fatal("REQN"); \
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000177 } else
178
179#else
180#define REQN(i, count) /* empty */
181#endif
182
183static nfagrammar *
184metacompile(n)
185 node *n;
186{
187 nfagrammar *gr;
188 int i;
189
190 printf("Compiling (meta-) parse tree into NFA grammar\n");
191 gr = newnfagrammar();
192 REQ(n, MSTART);
193 i = n->n_nchildren - 1; /* Last child is ENDMARKER */
194 n = n->n_child;
195 for (; --i >= 0; n++) {
196 if (n->n_type != NEWLINE)
197 compile_rule(gr, n);
198 }
199 return gr;
200}
201
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000202static void
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000203compile_rule(gr, n)
204 nfagrammar *gr;
205 node *n;
206{
207 nfa *nf;
208
209 REQ(n, RULE);
210 REQN(n->n_nchildren, 4);
211 n = n->n_child;
212 REQ(n, NAME);
213 nf = addnfa(gr, n->n_str);
214 n++;
215 REQ(n, COLON);
216 n++;
217 REQ(n, RHS);
218 compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish);
219 n++;
220 REQ(n, NEWLINE);
221}
222
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000223static void
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000224compile_rhs(ll, nf, n, pa, pb)
225 labellist *ll;
226 nfa *nf;
227 node *n;
228 int *pa, *pb;
229{
230 int i;
231 int a, b;
232
233 REQ(n, RHS);
234 i = n->n_nchildren;
235 REQN(i, 1);
236 n = n->n_child;
237 REQ(n, ALT);
238 compile_alt(ll, nf, n, pa, pb);
239 if (--i <= 0)
240 return;
241 n++;
242 a = *pa;
243 b = *pb;
244 *pa = addnfastate(nf);
245 *pb = addnfastate(nf);
246 addnfaarc(nf, *pa, a, EMPTY);
247 addnfaarc(nf, b, *pb, EMPTY);
248 for (; --i >= 0; n++) {
249 REQ(n, VBAR);
250 REQN(i, 1);
251 --i;
252 n++;
253 REQ(n, ALT);
254 compile_alt(ll, nf, n, &a, &b);
255 addnfaarc(nf, *pa, a, EMPTY);
256 addnfaarc(nf, b, *pb, EMPTY);
257 }
258}
259
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000260static void
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000261compile_alt(ll, nf, n, pa, pb)
262 labellist *ll;
263 nfa *nf;
264 node *n;
265 int *pa, *pb;
266{
267 int i;
268 int a, b;
269
270 REQ(n, ALT);
271 i = n->n_nchildren;
272 REQN(i, 1);
273 n = n->n_child;
274 REQ(n, ITEM);
275 compile_item(ll, nf, n, pa, pb);
276 --i;
277 n++;
278 for (; --i >= 0; n++) {
279 if (n->n_type == COMMA) { /* XXX Temporary */
280 REQN(i, 1);
281 --i;
282 n++;
283 }
284 REQ(n, ITEM);
285 compile_item(ll, nf, n, &a, &b);
286 addnfaarc(nf, *pb, a, EMPTY);
287 *pb = b;
288 }
289}
290
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000291static void
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000292compile_item(ll, nf, n, pa, pb)
293 labellist *ll;
294 nfa *nf;
295 node *n;
296 int *pa, *pb;
297{
298 int i;
299 int a, b;
300
301 REQ(n, ITEM);
302 i = n->n_nchildren;
303 REQN(i, 1);
304 n = n->n_child;
305 if (n->n_type == LSQB) {
306 REQN(i, 3);
307 n++;
308 REQ(n, RHS);
309 *pa = addnfastate(nf);
310 *pb = addnfastate(nf);
311 addnfaarc(nf, *pa, *pb, EMPTY);
312 compile_rhs(ll, nf, n, &a, &b);
313 addnfaarc(nf, *pa, a, EMPTY);
314 addnfaarc(nf, b, *pb, EMPTY);
315 REQN(i, 1);
316 n++;
317 REQ(n, RSQB);
318 }
319 else {
320 compile_atom(ll, nf, n, pa, pb);
321 if (--i <= 0)
322 return;
323 n++;
324 addnfaarc(nf, *pb, *pa, EMPTY);
325 if (n->n_type == STAR)
326 *pb = *pa;
327 else
328 REQ(n, PLUS);
329 }
330}
331
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000332static void
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000333compile_atom(ll, nf, n, pa, pb)
334 labellist *ll;
335 nfa *nf;
336 node *n;
337 int *pa, *pb;
338{
339 int i;
340
341 REQ(n, ATOM);
342 i = n->n_nchildren;
343 REQN(i, 1);
344 n = n->n_child;
345 if (n->n_type == LPAR) {
346 REQN(i, 3);
347 n++;
348 REQ(n, RHS);
349 compile_rhs(ll, nf, n, pa, pb);
350 n++;
351 REQ(n, RPAR);
352 }
353 else if (n->n_type == NAME || n->n_type == STRING) {
354 *pa = addnfastate(nf);
355 *pb = addnfastate(nf);
356 addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str));
357 }
358 else
359 REQ(n, NAME);
360}
361
362static void
363dumpstate(ll, nf, istate)
364 labellist *ll;
365 nfa *nf;
366 int istate;
367{
368 nfastate *st;
369 int i;
370 nfaarc *ar;
371
372 printf("%c%2d%c",
373 istate == nf->nf_start ? '*' : ' ',
374 istate,
375 istate == nf->nf_finish ? '.' : ' ');
376 st = &nf->nf_state[istate];
377 ar = st->st_arc;
378 for (i = 0; i < st->st_narcs; i++) {
379 if (i > 0)
380 printf("\n ");
381 printf("-> %2d %s", ar->ar_arrow,
382 labelrepr(&ll->ll_label[ar->ar_label]));
383 ar++;
384 }
385 printf("\n");
386}
387
388static void
389dumpnfa(ll, nf)
390 labellist *ll;
391 nfa *nf;
392{
393 int i;
394
395 printf("NFA '%s' has %d states; start %d, finish %d\n",
396 nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish);
397 for (i = 0; i < nf->nf_nstates; i++)
398 dumpstate(ll, nf, i);
399}
400
401
402/* PART TWO -- CONSTRUCT DFA -- Algorithm 3.1 from [Aho&Ullman 77] */
403
Guido van Rossum588633d1994-12-30 15:46:02 +0000404static void
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000405addclosure(ss, nf, istate)
406 bitset ss;
407 nfa *nf;
408 int istate;
409{
410 if (addbit(ss, istate)) {
411 nfastate *st = &nf->nf_state[istate];
412 nfaarc *ar = st->st_arc;
413 int i;
414
415 for (i = st->st_narcs; --i >= 0; ) {
416 if (ar->ar_label == EMPTY)
417 addclosure(ss, nf, ar->ar_arrow);
418 ar++;
419 }
420 }
421}
422
423typedef struct _ss_arc {
424 bitset sa_bitset;
425 int sa_arrow;
426 int sa_label;
427} ss_arc;
428
429typedef struct _ss_state {
430 bitset ss_ss;
431 int ss_narcs;
432 ss_arc *ss_arc;
433 int ss_deleted;
434 int ss_finish;
435 int ss_rename;
436} ss_state;
437
438typedef struct _ss_dfa {
439 int sd_nstates;
440 ss_state *sd_state;
441} ss_dfa;
442
Guido van Rossum71f477c1991-04-03 19:09:02 +0000443/* Forward */
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000444static void printssdfa PROTO((int xx_nstates, ss_state *xx_state, int nbits,
445 labellist *ll, char *msg));
446static void simplify PROTO((int xx_nstates, ss_state *xx_state));
447static void convert PROTO((dfa *d, int xx_nstates, ss_state *xx_state));
Guido van Rossum71f477c1991-04-03 19:09:02 +0000448
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000449static void
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000450makedfa(gr, nf, d)
451 nfagrammar *gr;
452 nfa *nf;
453 dfa *d;
454{
455 int nbits = nf->nf_nstates;
456 bitset ss;
457 int xx_nstates;
458 ss_state *xx_state, *yy;
459 ss_arc *zz;
460 int istate, jstate, iarc, jarc, ibit;
461 nfastate *st;
462 nfaarc *ar;
463
464 ss = newbitset(nbits);
465 addclosure(ss, nf, nf->nf_start);
466 xx_state = NEW(ss_state, 1);
467 if (xx_state == NULL)
468 fatal("no mem for xx_state in makedfa");
469 xx_nstates = 1;
470 yy = &xx_state[0];
471 yy->ss_ss = ss;
472 yy->ss_narcs = 0;
473 yy->ss_arc = NULL;
474 yy->ss_deleted = 0;
475 yy->ss_finish = testbit(ss, nf->nf_finish);
476 if (yy->ss_finish)
477 printf("Error: nonterminal '%s' may produce empty.\n",
478 nf->nf_name);
479
480 /* This algorithm is from a book written before
481 the invention of structured programming... */
482
483 /* For each unmarked state... */
484 for (istate = 0; istate < xx_nstates; ++istate) {
485 yy = &xx_state[istate];
486 ss = yy->ss_ss;
487 /* For all its states... */
488 for (ibit = 0; ibit < nf->nf_nstates; ++ibit) {
489 if (!testbit(ss, ibit))
490 continue;
491 st = &nf->nf_state[ibit];
492 /* For all non-empty arcs from this state... */
493 for (iarc = 0; iarc < st->st_narcs; iarc++) {
494 ar = &st->st_arc[iarc];
495 if (ar->ar_label == EMPTY)
496 continue;
497 /* Look up in list of arcs from this state */
498 for (jarc = 0; jarc < yy->ss_narcs; ++jarc) {
499 zz = &yy->ss_arc[jarc];
500 if (ar->ar_label == zz->sa_label)
501 goto found;
502 }
503 /* Add new arc for this state */
504 RESIZE(yy->ss_arc, ss_arc, yy->ss_narcs + 1);
505 if (yy->ss_arc == NULL)
506 fatal("out of mem");
507 zz = &yy->ss_arc[yy->ss_narcs++];
508 zz->sa_label = ar->ar_label;
509 zz->sa_bitset = newbitset(nbits);
510 zz->sa_arrow = -1;
511 found: ;
512 /* Add destination */
513 addclosure(zz->sa_bitset, nf, ar->ar_arrow);
514 }
515 }
516 /* Now look up all the arrow states */
517 for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) {
518 zz = &xx_state[istate].ss_arc[jarc];
519 for (jstate = 0; jstate < xx_nstates; jstate++) {
520 if (samebitset(zz->sa_bitset,
521 xx_state[jstate].ss_ss, nbits)) {
522 zz->sa_arrow = jstate;
523 goto done;
524 }
525 }
526 RESIZE(xx_state, ss_state, xx_nstates + 1);
527 if (xx_state == NULL)
528 fatal("out of mem");
529 zz->sa_arrow = xx_nstates;
530 yy = &xx_state[xx_nstates++];
531 yy->ss_ss = zz->sa_bitset;
532 yy->ss_narcs = 0;
533 yy->ss_arc = NULL;
534 yy->ss_deleted = 0;
535 yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish);
536 done: ;
537 }
538 }
539
540 if (debugging)
541 printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
542 "before minimizing");
543
544 simplify(xx_nstates, xx_state);
545
546 if (debugging)
547 printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
548 "after minimizing");
549
550 convert(d, xx_nstates, xx_state);
551
552 /* XXX cleanup */
553}
554
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000555static void
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000556printssdfa(xx_nstates, xx_state, nbits, ll, msg)
557 int xx_nstates;
558 ss_state *xx_state;
559 int nbits;
560 labellist *ll;
561 char *msg;
562{
563 int i, ibit, iarc;
564 ss_state *yy;
565 ss_arc *zz;
566
567 printf("Subset DFA %s\n", msg);
568 for (i = 0; i < xx_nstates; i++) {
569 yy = &xx_state[i];
570 if (yy->ss_deleted)
571 continue;
572 printf(" Subset %d", i);
573 if (yy->ss_finish)
574 printf(" (finish)");
575 printf(" { ");
576 for (ibit = 0; ibit < nbits; ibit++) {
577 if (testbit(yy->ss_ss, ibit))
578 printf("%d ", ibit);
579 }
580 printf("}\n");
581 for (iarc = 0; iarc < yy->ss_narcs; iarc++) {
582 zz = &yy->ss_arc[iarc];
583 printf(" Arc to state %d, label %s\n",
584 zz->sa_arrow,
585 labelrepr(&ll->ll_label[zz->sa_label]));
586 }
587 }
588}
589
590
591/* PART THREE -- SIMPLIFY DFA */
592
593/* Simplify the DFA by repeatedly eliminating states that are
594 equivalent to another oner. This is NOT Algorithm 3.3 from
595 [Aho&Ullman 77]. It does not always finds the minimal DFA,
596 but it does usually make a much smaller one... (For an example
597 of sub-optimal behaviour, try S: x a b+ | y a b+.)
598*/
599
600static int
601samestate(s1, s2)
602 ss_state *s1, *s2;
603{
604 int i;
605
606 if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish)
607 return 0;
608 for (i = 0; i < s1->ss_narcs; i++) {
609 if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow ||
610 s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label)
611 return 0;
612 }
613 return 1;
614}
615
616static void
617renamestates(xx_nstates, xx_state, from, to)
618 int xx_nstates;
619 ss_state *xx_state;
620 int from, to;
621{
622 int i, j;
623
624 if (debugging)
625 printf("Rename state %d to %d.\n", from, to);
626 for (i = 0; i < xx_nstates; i++) {
627 if (xx_state[i].ss_deleted)
628 continue;
629 for (j = 0; j < xx_state[i].ss_narcs; j++) {
630 if (xx_state[i].ss_arc[j].sa_arrow == from)
631 xx_state[i].ss_arc[j].sa_arrow = to;
632 }
633 }
634}
635
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000636static void
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000637simplify(xx_nstates, xx_state)
638 int xx_nstates;
639 ss_state *xx_state;
640{
641 int changes;
Guido van Rossum588633d1994-12-30 15:46:02 +0000642 int i, j;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000643
644 do {
645 changes = 0;
646 for (i = 1; i < xx_nstates; i++) {
647 if (xx_state[i].ss_deleted)
648 continue;
649 for (j = 0; j < i; j++) {
650 if (xx_state[j].ss_deleted)
651 continue;
652 if (samestate(&xx_state[i], &xx_state[j])) {
653 xx_state[i].ss_deleted++;
654 renamestates(xx_nstates, xx_state, i, j);
655 changes++;
656 break;
657 }
658 }
659 }
660 } while (changes);
661}
662
663
664/* PART FOUR -- GENERATE PARSING TABLES */
665
666/* Convert the DFA into a grammar that can be used by our parser */
667
Guido van Rossumfd8a3931996-12-02 18:27:33 +0000668static void
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000669convert(d, xx_nstates, xx_state)
670 dfa *d;
671 int xx_nstates;
672 ss_state *xx_state;
673{
674 int i, j;
675 ss_state *yy;
676 ss_arc *zz;
677
678 for (i = 0; i < xx_nstates; i++) {
679 yy = &xx_state[i];
680 if (yy->ss_deleted)
681 continue;
682 yy->ss_rename = addstate(d);
683 }
684
685 for (i = 0; i < xx_nstates; i++) {
686 yy = &xx_state[i];
687 if (yy->ss_deleted)
688 continue;
689 for (j = 0; j < yy->ss_narcs; j++) {
690 zz = &yy->ss_arc[j];
691 addarc(d, yy->ss_rename,
692 xx_state[zz->sa_arrow].ss_rename,
693 zz->sa_label);
694 }
695 if (yy->ss_finish)
696 addarc(d, yy->ss_rename, yy->ss_rename, 0);
697 }
698
699 d->d_initial = 0;
700}
701
702
703/* PART FIVE -- GLUE IT ALL TOGETHER */
704
705static grammar *
706maketables(gr)
707 nfagrammar *gr;
708{
709 int i;
710 nfa *nf;
711 dfa *d;
712 grammar *g;
713
714 if (gr->gr_nnfas == 0)
715 return NULL;
716 g = newgrammar(gr->gr_nfa[0]->nf_type);
717 /* XXX first rule must be start rule */
718 g->g_ll = gr->gr_ll;
719
720 for (i = 0; i < gr->gr_nnfas; i++) {
721 nf = gr->gr_nfa[i];
722 if (debugging) {
723 printf("Dump of NFA for '%s' ...\n", nf->nf_name);
724 dumpnfa(&gr->gr_ll, nf);
725 }
726 printf("Making DFA for '%s' ...\n", nf->nf_name);
727 d = adddfa(g, nf->nf_type, nf->nf_name);
728 makedfa(gr, gr->gr_nfa[i], d);
729 }
730
731 return g;
732}
733
734grammar *
735pgen(n)
736 node *n;
737{
738 nfagrammar *gr;
739 grammar *g;
740
741 gr = metacompile(n);
742 g = maketables(gr);
743 translatelabels(g);
744 addfirstsets(g);
745 return g;
746}
747
748
749/*
750
751Description
752-----------
753
754Input is a grammar in extended BNF (using * for repetition, + for
755at-least-once repetition, [] for optional parts, | for alternatives and
756() for grouping). This has already been parsed and turned into a parse
757tree.
758
759Each rule is considered as a regular expression in its own right.
760It is turned into a Non-deterministic Finite Automaton (NFA), which
761is then turned into a Deterministic Finite Automaton (DFA), which is then
762optimized to reduce the number of states. See [Aho&Ullman 77] chapter 3,
763or similar compiler books (this technique is more often used for lexical
764analyzers).
765
766The DFA's are used by the parser as parsing tables in a special way
767that's probably unique. Before they are usable, the FIRST sets of all
768non-terminals are computed.
769
770Reference
771---------
772
773[Aho&Ullman 77]
774 Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977
775 (first edition)
776
777*/