blob: dff4aed704282814e7685063f3c6d203f992b3a8 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumb9f8d6e1995-01-04 19:08:09 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* Parser generator */
Guido van Rossum3f5da241990-12-20 15:06:42 +000033/* XXX This file is not yet fully PROTOized */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000034
35/* For a description, see the comments at end of this file */
36
Guido van Rossum3f5da241990-12-20 15:06:42 +000037#include "pgenheaders.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038#include "assert.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000039#include "token.h"
40#include "node.h"
41#include "grammar.h"
42#include "metagrammar.h"
43#include "pgen.h"
44
45extern int debugging;
46
47
48/* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */
49
50typedef struct _nfaarc {
51 int ar_label;
52 int ar_arrow;
53} nfaarc;
54
55typedef struct _nfastate {
56 int st_narcs;
57 nfaarc *st_arc;
58} nfastate;
59
60typedef struct _nfa {
61 int nf_type;
62 char *nf_name;
63 int nf_nstates;
64 nfastate *nf_state;
65 int nf_start, nf_finish;
66} nfa;
67
Guido van Rossum71f477c1991-04-03 19:09:02 +000068/* Forward */
69static compile_rhs PROTO((labellist *ll, nfa *nf, node *n, int *pa, int *pb));
70static compile_alt PROTO((labellist *ll, nfa *nf, node *n, int *pa, int *pb));
71static compile_item PROTO((labellist *ll, nfa *nf, node *n, int *pa, int *pb));
72static compile_atom PROTO((labellist *ll, nfa *nf, node *n, int *pa, int *pb));
73
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000074static int
75addnfastate(nf)
76 nfa *nf;
77{
78 nfastate *st;
79
80 RESIZE(nf->nf_state, nfastate, nf->nf_nstates + 1);
81 if (nf->nf_state == NULL)
82 fatal("out of mem");
83 st = &nf->nf_state[nf->nf_nstates++];
84 st->st_narcs = 0;
85 st->st_arc = NULL;
86 return st - nf->nf_state;
87}
88
89static void
90addnfaarc(nf, from, to, lbl)
91 nfa *nf;
92 int from, to, lbl;
93{
94 nfastate *st;
95 nfaarc *ar;
96
97 st = &nf->nf_state[from];
98 RESIZE(st->st_arc, nfaarc, st->st_narcs + 1);
99 if (st->st_arc == NULL)
100 fatal("out of mem");
101 ar = &st->st_arc[st->st_narcs++];
102 ar->ar_label = lbl;
103 ar->ar_arrow = to;
104}
105
106static nfa *
107newnfa(name)
108 char *name;
109{
110 nfa *nf;
111 static type = NT_OFFSET; /* All types will be disjunct */
112
113 nf = NEW(nfa, 1);
114 if (nf == NULL)
115 fatal("no mem for new nfa");
116 nf->nf_type = type++;
117 nf->nf_name = name; /* XXX strdup(name) ??? */
118 nf->nf_nstates = 0;
119 nf->nf_state = NULL;
120 nf->nf_start = nf->nf_finish = -1;
121 return nf;
122}
123
124typedef struct _nfagrammar {
125 int gr_nnfas;
126 nfa **gr_nfa;
127 labellist gr_ll;
128} nfagrammar;
129
Guido van Rossum71f477c1991-04-03 19:09:02 +0000130/* Forward */
131static compile_rule PROTO((nfagrammar *gr, node *n));
132
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000133static nfagrammar *
134newnfagrammar()
135{
136 nfagrammar *gr;
137
138 gr = NEW(nfagrammar, 1);
139 if (gr == NULL)
140 fatal("no mem for new nfa grammar");
141 gr->gr_nnfas = 0;
142 gr->gr_nfa = NULL;
143 gr->gr_ll.ll_nlabels = 0;
144 gr->gr_ll.ll_label = NULL;
145 addlabel(&gr->gr_ll, ENDMARKER, "EMPTY");
146 return gr;
147}
148
149static nfa *
150addnfa(gr, name)
151 nfagrammar *gr;
152 char *name;
153{
154 nfa *nf;
155
156 nf = newnfa(name);
157 RESIZE(gr->gr_nfa, nfa *, gr->gr_nnfas + 1);
158 if (gr->gr_nfa == NULL)
159 fatal("out of mem");
160 gr->gr_nfa[gr->gr_nnfas++] = nf;
161 addlabel(&gr->gr_ll, NAME, nf->nf_name);
162 return nf;
163}
164
165#ifdef DEBUG
166
167static char REQNFMT[] = "metacompile: less than %d children\n";
168
169#define REQN(i, count) \
170 if (i < count) { \
171 fprintf(stderr, REQNFMT, count); \
Guido van Rossum588633d1994-12-30 15:46:02 +0000172 fatal("REQN"); \
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000173 } else
174
175#else
176#define REQN(i, count) /* empty */
177#endif
178
179static nfagrammar *
180metacompile(n)
181 node *n;
182{
183 nfagrammar *gr;
184 int i;
185
186 printf("Compiling (meta-) parse tree into NFA grammar\n");
187 gr = newnfagrammar();
188 REQ(n, MSTART);
189 i = n->n_nchildren - 1; /* Last child is ENDMARKER */
190 n = n->n_child;
191 for (; --i >= 0; n++) {
192 if (n->n_type != NEWLINE)
193 compile_rule(gr, n);
194 }
195 return gr;
196}
197
198static
199compile_rule(gr, n)
200 nfagrammar *gr;
201 node *n;
202{
203 nfa *nf;
204
205 REQ(n, RULE);
206 REQN(n->n_nchildren, 4);
207 n = n->n_child;
208 REQ(n, NAME);
209 nf = addnfa(gr, n->n_str);
210 n++;
211 REQ(n, COLON);
212 n++;
213 REQ(n, RHS);
214 compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish);
215 n++;
216 REQ(n, NEWLINE);
217}
218
219static
220compile_rhs(ll, nf, n, pa, pb)
221 labellist *ll;
222 nfa *nf;
223 node *n;
224 int *pa, *pb;
225{
226 int i;
227 int a, b;
228
229 REQ(n, RHS);
230 i = n->n_nchildren;
231 REQN(i, 1);
232 n = n->n_child;
233 REQ(n, ALT);
234 compile_alt(ll, nf, n, pa, pb);
235 if (--i <= 0)
236 return;
237 n++;
238 a = *pa;
239 b = *pb;
240 *pa = addnfastate(nf);
241 *pb = addnfastate(nf);
242 addnfaarc(nf, *pa, a, EMPTY);
243 addnfaarc(nf, b, *pb, EMPTY);
244 for (; --i >= 0; n++) {
245 REQ(n, VBAR);
246 REQN(i, 1);
247 --i;
248 n++;
249 REQ(n, ALT);
250 compile_alt(ll, nf, n, &a, &b);
251 addnfaarc(nf, *pa, a, EMPTY);
252 addnfaarc(nf, b, *pb, EMPTY);
253 }
254}
255
256static
257compile_alt(ll, nf, n, pa, pb)
258 labellist *ll;
259 nfa *nf;
260 node *n;
261 int *pa, *pb;
262{
263 int i;
264 int a, b;
265
266 REQ(n, ALT);
267 i = n->n_nchildren;
268 REQN(i, 1);
269 n = n->n_child;
270 REQ(n, ITEM);
271 compile_item(ll, nf, n, pa, pb);
272 --i;
273 n++;
274 for (; --i >= 0; n++) {
275 if (n->n_type == COMMA) { /* XXX Temporary */
276 REQN(i, 1);
277 --i;
278 n++;
279 }
280 REQ(n, ITEM);
281 compile_item(ll, nf, n, &a, &b);
282 addnfaarc(nf, *pb, a, EMPTY);
283 *pb = b;
284 }
285}
286
287static
288compile_item(ll, nf, n, pa, pb)
289 labellist *ll;
290 nfa *nf;
291 node *n;
292 int *pa, *pb;
293{
294 int i;
295 int a, b;
296
297 REQ(n, ITEM);
298 i = n->n_nchildren;
299 REQN(i, 1);
300 n = n->n_child;
301 if (n->n_type == LSQB) {
302 REQN(i, 3);
303 n++;
304 REQ(n, RHS);
305 *pa = addnfastate(nf);
306 *pb = addnfastate(nf);
307 addnfaarc(nf, *pa, *pb, EMPTY);
308 compile_rhs(ll, nf, n, &a, &b);
309 addnfaarc(nf, *pa, a, EMPTY);
310 addnfaarc(nf, b, *pb, EMPTY);
311 REQN(i, 1);
312 n++;
313 REQ(n, RSQB);
314 }
315 else {
316 compile_atom(ll, nf, n, pa, pb);
317 if (--i <= 0)
318 return;
319 n++;
320 addnfaarc(nf, *pb, *pa, EMPTY);
321 if (n->n_type == STAR)
322 *pb = *pa;
323 else
324 REQ(n, PLUS);
325 }
326}
327
328static
329compile_atom(ll, nf, n, pa, pb)
330 labellist *ll;
331 nfa *nf;
332 node *n;
333 int *pa, *pb;
334{
335 int i;
336
337 REQ(n, ATOM);
338 i = n->n_nchildren;
339 REQN(i, 1);
340 n = n->n_child;
341 if (n->n_type == LPAR) {
342 REQN(i, 3);
343 n++;
344 REQ(n, RHS);
345 compile_rhs(ll, nf, n, pa, pb);
346 n++;
347 REQ(n, RPAR);
348 }
349 else if (n->n_type == NAME || n->n_type == STRING) {
350 *pa = addnfastate(nf);
351 *pb = addnfastate(nf);
352 addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str));
353 }
354 else
355 REQ(n, NAME);
356}
357
358static void
359dumpstate(ll, nf, istate)
360 labellist *ll;
361 nfa *nf;
362 int istate;
363{
364 nfastate *st;
365 int i;
366 nfaarc *ar;
367
368 printf("%c%2d%c",
369 istate == nf->nf_start ? '*' : ' ',
370 istate,
371 istate == nf->nf_finish ? '.' : ' ');
372 st = &nf->nf_state[istate];
373 ar = st->st_arc;
374 for (i = 0; i < st->st_narcs; i++) {
375 if (i > 0)
376 printf("\n ");
377 printf("-> %2d %s", ar->ar_arrow,
378 labelrepr(&ll->ll_label[ar->ar_label]));
379 ar++;
380 }
381 printf("\n");
382}
383
384static void
385dumpnfa(ll, nf)
386 labellist *ll;
387 nfa *nf;
388{
389 int i;
390
391 printf("NFA '%s' has %d states; start %d, finish %d\n",
392 nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish);
393 for (i = 0; i < nf->nf_nstates; i++)
394 dumpstate(ll, nf, i);
395}
396
397
398/* PART TWO -- CONSTRUCT DFA -- Algorithm 3.1 from [Aho&Ullman 77] */
399
Guido van Rossum588633d1994-12-30 15:46:02 +0000400static void
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000401addclosure(ss, nf, istate)
402 bitset ss;
403 nfa *nf;
404 int istate;
405{
406 if (addbit(ss, istate)) {
407 nfastate *st = &nf->nf_state[istate];
408 nfaarc *ar = st->st_arc;
409 int i;
410
411 for (i = st->st_narcs; --i >= 0; ) {
412 if (ar->ar_label == EMPTY)
413 addclosure(ss, nf, ar->ar_arrow);
414 ar++;
415 }
416 }
417}
418
419typedef struct _ss_arc {
420 bitset sa_bitset;
421 int sa_arrow;
422 int sa_label;
423} ss_arc;
424
425typedef struct _ss_state {
426 bitset ss_ss;
427 int ss_narcs;
428 ss_arc *ss_arc;
429 int ss_deleted;
430 int ss_finish;
431 int ss_rename;
432} ss_state;
433
434typedef struct _ss_dfa {
435 int sd_nstates;
436 ss_state *sd_state;
437} ss_dfa;
438
Guido van Rossum71f477c1991-04-03 19:09:02 +0000439/* Forward */
440static printssdfa PROTO((int xx_nstates, ss_state *xx_state, int nbits,
441 labellist *ll, char *msg));
442static simplify PROTO((int xx_nstates, ss_state *xx_state));
443static convert PROTO((dfa *d, int xx_nstates, ss_state *xx_state));
444
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000445static
446makedfa(gr, nf, d)
447 nfagrammar *gr;
448 nfa *nf;
449 dfa *d;
450{
451 int nbits = nf->nf_nstates;
452 bitset ss;
453 int xx_nstates;
454 ss_state *xx_state, *yy;
455 ss_arc *zz;
456 int istate, jstate, iarc, jarc, ibit;
457 nfastate *st;
458 nfaarc *ar;
459
460 ss = newbitset(nbits);
461 addclosure(ss, nf, nf->nf_start);
462 xx_state = NEW(ss_state, 1);
463 if (xx_state == NULL)
464 fatal("no mem for xx_state in makedfa");
465 xx_nstates = 1;
466 yy = &xx_state[0];
467 yy->ss_ss = ss;
468 yy->ss_narcs = 0;
469 yy->ss_arc = NULL;
470 yy->ss_deleted = 0;
471 yy->ss_finish = testbit(ss, nf->nf_finish);
472 if (yy->ss_finish)
473 printf("Error: nonterminal '%s' may produce empty.\n",
474 nf->nf_name);
475
476 /* This algorithm is from a book written before
477 the invention of structured programming... */
478
479 /* For each unmarked state... */
480 for (istate = 0; istate < xx_nstates; ++istate) {
481 yy = &xx_state[istate];
482 ss = yy->ss_ss;
483 /* For all its states... */
484 for (ibit = 0; ibit < nf->nf_nstates; ++ibit) {
485 if (!testbit(ss, ibit))
486 continue;
487 st = &nf->nf_state[ibit];
488 /* For all non-empty arcs from this state... */
489 for (iarc = 0; iarc < st->st_narcs; iarc++) {
490 ar = &st->st_arc[iarc];
491 if (ar->ar_label == EMPTY)
492 continue;
493 /* Look up in list of arcs from this state */
494 for (jarc = 0; jarc < yy->ss_narcs; ++jarc) {
495 zz = &yy->ss_arc[jarc];
496 if (ar->ar_label == zz->sa_label)
497 goto found;
498 }
499 /* Add new arc for this state */
500 RESIZE(yy->ss_arc, ss_arc, yy->ss_narcs + 1);
501 if (yy->ss_arc == NULL)
502 fatal("out of mem");
503 zz = &yy->ss_arc[yy->ss_narcs++];
504 zz->sa_label = ar->ar_label;
505 zz->sa_bitset = newbitset(nbits);
506 zz->sa_arrow = -1;
507 found: ;
508 /* Add destination */
509 addclosure(zz->sa_bitset, nf, ar->ar_arrow);
510 }
511 }
512 /* Now look up all the arrow states */
513 for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) {
514 zz = &xx_state[istate].ss_arc[jarc];
515 for (jstate = 0; jstate < xx_nstates; jstate++) {
516 if (samebitset(zz->sa_bitset,
517 xx_state[jstate].ss_ss, nbits)) {
518 zz->sa_arrow = jstate;
519 goto done;
520 }
521 }
522 RESIZE(xx_state, ss_state, xx_nstates + 1);
523 if (xx_state == NULL)
524 fatal("out of mem");
525 zz->sa_arrow = xx_nstates;
526 yy = &xx_state[xx_nstates++];
527 yy->ss_ss = zz->sa_bitset;
528 yy->ss_narcs = 0;
529 yy->ss_arc = NULL;
530 yy->ss_deleted = 0;
531 yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish);
532 done: ;
533 }
534 }
535
536 if (debugging)
537 printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
538 "before minimizing");
539
540 simplify(xx_nstates, xx_state);
541
542 if (debugging)
543 printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
544 "after minimizing");
545
546 convert(d, xx_nstates, xx_state);
547
548 /* XXX cleanup */
549}
550
551static
552printssdfa(xx_nstates, xx_state, nbits, ll, msg)
553 int xx_nstates;
554 ss_state *xx_state;
555 int nbits;
556 labellist *ll;
557 char *msg;
558{
559 int i, ibit, iarc;
560 ss_state *yy;
561 ss_arc *zz;
562
563 printf("Subset DFA %s\n", msg);
564 for (i = 0; i < xx_nstates; i++) {
565 yy = &xx_state[i];
566 if (yy->ss_deleted)
567 continue;
568 printf(" Subset %d", i);
569 if (yy->ss_finish)
570 printf(" (finish)");
571 printf(" { ");
572 for (ibit = 0; ibit < nbits; ibit++) {
573 if (testbit(yy->ss_ss, ibit))
574 printf("%d ", ibit);
575 }
576 printf("}\n");
577 for (iarc = 0; iarc < yy->ss_narcs; iarc++) {
578 zz = &yy->ss_arc[iarc];
579 printf(" Arc to state %d, label %s\n",
580 zz->sa_arrow,
581 labelrepr(&ll->ll_label[zz->sa_label]));
582 }
583 }
584}
585
586
587/* PART THREE -- SIMPLIFY DFA */
588
589/* Simplify the DFA by repeatedly eliminating states that are
590 equivalent to another oner. This is NOT Algorithm 3.3 from
591 [Aho&Ullman 77]. It does not always finds the minimal DFA,
592 but it does usually make a much smaller one... (For an example
593 of sub-optimal behaviour, try S: x a b+ | y a b+.)
594*/
595
596static int
597samestate(s1, s2)
598 ss_state *s1, *s2;
599{
600 int i;
601
602 if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish)
603 return 0;
604 for (i = 0; i < s1->ss_narcs; i++) {
605 if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow ||
606 s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label)
607 return 0;
608 }
609 return 1;
610}
611
612static void
613renamestates(xx_nstates, xx_state, from, to)
614 int xx_nstates;
615 ss_state *xx_state;
616 int from, to;
617{
618 int i, j;
619
620 if (debugging)
621 printf("Rename state %d to %d.\n", from, to);
622 for (i = 0; i < xx_nstates; i++) {
623 if (xx_state[i].ss_deleted)
624 continue;
625 for (j = 0; j < xx_state[i].ss_narcs; j++) {
626 if (xx_state[i].ss_arc[j].sa_arrow == from)
627 xx_state[i].ss_arc[j].sa_arrow = to;
628 }
629 }
630}
631
632static
633simplify(xx_nstates, xx_state)
634 int xx_nstates;
635 ss_state *xx_state;
636{
637 int changes;
Guido van Rossum588633d1994-12-30 15:46:02 +0000638 int i, j;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000639
640 do {
641 changes = 0;
642 for (i = 1; i < xx_nstates; i++) {
643 if (xx_state[i].ss_deleted)
644 continue;
645 for (j = 0; j < i; j++) {
646 if (xx_state[j].ss_deleted)
647 continue;
648 if (samestate(&xx_state[i], &xx_state[j])) {
649 xx_state[i].ss_deleted++;
650 renamestates(xx_nstates, xx_state, i, j);
651 changes++;
652 break;
653 }
654 }
655 }
656 } while (changes);
657}
658
659
660/* PART FOUR -- GENERATE PARSING TABLES */
661
662/* Convert the DFA into a grammar that can be used by our parser */
663
664static
665convert(d, xx_nstates, xx_state)
666 dfa *d;
667 int xx_nstates;
668 ss_state *xx_state;
669{
670 int i, j;
671 ss_state *yy;
672 ss_arc *zz;
673
674 for (i = 0; i < xx_nstates; i++) {
675 yy = &xx_state[i];
676 if (yy->ss_deleted)
677 continue;
678 yy->ss_rename = addstate(d);
679 }
680
681 for (i = 0; i < xx_nstates; i++) {
682 yy = &xx_state[i];
683 if (yy->ss_deleted)
684 continue;
685 for (j = 0; j < yy->ss_narcs; j++) {
686 zz = &yy->ss_arc[j];
687 addarc(d, yy->ss_rename,
688 xx_state[zz->sa_arrow].ss_rename,
689 zz->sa_label);
690 }
691 if (yy->ss_finish)
692 addarc(d, yy->ss_rename, yy->ss_rename, 0);
693 }
694
695 d->d_initial = 0;
696}
697
698
699/* PART FIVE -- GLUE IT ALL TOGETHER */
700
701static grammar *
702maketables(gr)
703 nfagrammar *gr;
704{
705 int i;
706 nfa *nf;
707 dfa *d;
708 grammar *g;
709
710 if (gr->gr_nnfas == 0)
711 return NULL;
712 g = newgrammar(gr->gr_nfa[0]->nf_type);
713 /* XXX first rule must be start rule */
714 g->g_ll = gr->gr_ll;
715
716 for (i = 0; i < gr->gr_nnfas; i++) {
717 nf = gr->gr_nfa[i];
718 if (debugging) {
719 printf("Dump of NFA for '%s' ...\n", nf->nf_name);
720 dumpnfa(&gr->gr_ll, nf);
721 }
722 printf("Making DFA for '%s' ...\n", nf->nf_name);
723 d = adddfa(g, nf->nf_type, nf->nf_name);
724 makedfa(gr, gr->gr_nfa[i], d);
725 }
726
727 return g;
728}
729
730grammar *
731pgen(n)
732 node *n;
733{
734 nfagrammar *gr;
735 grammar *g;
736
737 gr = metacompile(n);
738 g = maketables(gr);
739 translatelabels(g);
740 addfirstsets(g);
741 return g;
742}
743
744
745/*
746
747Description
748-----------
749
750Input is a grammar in extended BNF (using * for repetition, + for
751at-least-once repetition, [] for optional parts, | for alternatives and
752() for grouping). This has already been parsed and turned into a parse
753tree.
754
755Each rule is considered as a regular expression in its own right.
756It is turned into a Non-deterministic Finite Automaton (NFA), which
757is then turned into a Deterministic Finite Automaton (DFA), which is then
758optimized to reduce the number of states. See [Aho&Ullman 77] chapter 3,
759or similar compiler books (this technique is more often used for lexical
760analyzers).
761
762The DFA's are used by the parser as parsing tables in a special way
763that's probably unique. Before they are usable, the FIRST sets of all
764non-terminals are computed.
765
766Reference
767---------
768
769[Aho&Ullman 77]
770 Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977
771 (first edition)
772
773*/