| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 1 | /* Parser generator */ | 
|  | 2 |  | 
|  | 3 | /* For a description, see the comments at end of this file */ | 
|  | 4 |  | 
| Tim Peters | 1ca1296 | 2001-12-04 03:18:48 +0000 | [diff] [blame] | 5 | #include "Python.h" | 
| Guido van Rossum | 3f5da24 | 1990-12-20 15:06:42 +0000 | [diff] [blame] | 6 | #include "pgenheaders.h" | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 7 | #include "token.h" | 
|  | 8 | #include "node.h" | 
|  | 9 | #include "grammar.h" | 
|  | 10 | #include "metagrammar.h" | 
|  | 11 | #include "pgen.h" | 
|  | 12 |  | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 13 | extern int Py_DebugFlag; | 
| Neal Norwitz | 5352d8c | 2002-05-31 13:11:40 +0000 | [diff] [blame] | 14 | extern int Py_IgnoreEnvironmentFlag; /* needed by Py_GETENV */ | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 15 |  | 
|  | 16 |  | 
|  | 17 | /* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */ | 
|  | 18 |  | 
|  | 19 | typedef struct _nfaarc { | 
|  | 20 | int	ar_label; | 
|  | 21 | int	ar_arrow; | 
|  | 22 | } nfaarc; | 
|  | 23 |  | 
|  | 24 | typedef struct _nfastate { | 
|  | 25 | int	st_narcs; | 
|  | 26 | nfaarc	*st_arc; | 
|  | 27 | } nfastate; | 
|  | 28 |  | 
|  | 29 | typedef struct _nfa { | 
|  | 30 | int		nf_type; | 
|  | 31 | char		*nf_name; | 
|  | 32 | int		nf_nstates; | 
|  | 33 | nfastate	*nf_state; | 
|  | 34 | int		nf_start, nf_finish; | 
|  | 35 | } nfa; | 
|  | 36 |  | 
| Guido van Rossum | 71f477c | 1991-04-03 19:09:02 +0000 | [diff] [blame] | 37 | /* Forward */ | 
| Tim Peters | dbd9ba6 | 2000-07-09 03:09:57 +0000 | [diff] [blame] | 38 | static void compile_rhs(labellist *ll, | 
|  | 39 | nfa *nf, node *n, int *pa, int *pb); | 
|  | 40 | static void compile_alt(labellist *ll, | 
|  | 41 | nfa *nf, node *n, int *pa, int *pb); | 
|  | 42 | static void compile_item(labellist *ll, | 
|  | 43 | nfa *nf, node *n, int *pa, int *pb); | 
|  | 44 | static void compile_atom(labellist *ll, | 
|  | 45 | nfa *nf, node *n, int *pa, int *pb); | 
| Guido van Rossum | 71f477c | 1991-04-03 19:09:02 +0000 | [diff] [blame] | 46 |  | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 47 | static int | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 48 | addnfastate(nfa *nf) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 49 | { | 
|  | 50 | nfastate *st; | 
|  | 51 |  | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 52 | nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state, | 
|  | 53 | sizeof(nfastate) * (nf->nf_nstates + 1)); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 54 | if (nf->nf_state == NULL) | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 55 | Py_FatalError("out of mem"); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 56 | st = &nf->nf_state[nf->nf_nstates++]; | 
|  | 57 | st->st_narcs = 0; | 
|  | 58 | st->st_arc = NULL; | 
|  | 59 | return st - nf->nf_state; | 
|  | 60 | } | 
|  | 61 |  | 
|  | 62 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 63 | addnfaarc(nfa *nf, int from, int to, int lbl) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 64 | { | 
|  | 65 | nfastate *st; | 
|  | 66 | nfaarc *ar; | 
|  | 67 |  | 
|  | 68 | st = &nf->nf_state[from]; | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 69 | st->st_arc = (nfaarc *)PyObject_REALLOC(st->st_arc, | 
|  | 70 | sizeof(nfaarc) * (st->st_narcs + 1)); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 71 | if (st->st_arc == NULL) | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 72 | Py_FatalError("out of mem"); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 73 | ar = &st->st_arc[st->st_narcs++]; | 
|  | 74 | ar->ar_label = lbl; | 
|  | 75 | ar->ar_arrow = to; | 
|  | 76 | } | 
|  | 77 |  | 
|  | 78 | static nfa * | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 79 | newnfa(char *name) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 80 | { | 
|  | 81 | nfa *nf; | 
| Guido van Rossum | bb3649e | 1998-04-10 22:09:39 +0000 | [diff] [blame] | 82 | static int type = NT_OFFSET; /* All types will be disjunct */ | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 83 |  | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 84 | nf = (nfa *)PyObject_MALLOC(sizeof(nfa)); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 85 | if (nf == NULL) | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 86 | Py_FatalError("no mem for new nfa"); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 87 | nf->nf_type = type++; | 
|  | 88 | nf->nf_name = name; /* XXX strdup(name) ??? */ | 
|  | 89 | nf->nf_nstates = 0; | 
|  | 90 | nf->nf_state = NULL; | 
|  | 91 | nf->nf_start = nf->nf_finish = -1; | 
|  | 92 | return nf; | 
|  | 93 | } | 
|  | 94 |  | 
|  | 95 | typedef struct _nfagrammar { | 
|  | 96 | int		gr_nnfas; | 
|  | 97 | nfa		**gr_nfa; | 
|  | 98 | labellist	gr_ll; | 
|  | 99 | } nfagrammar; | 
|  | 100 |  | 
| Guido van Rossum | 71f477c | 1991-04-03 19:09:02 +0000 | [diff] [blame] | 101 | /* Forward */ | 
| Tim Peters | dbd9ba6 | 2000-07-09 03:09:57 +0000 | [diff] [blame] | 102 | static void compile_rule(nfagrammar *gr, node *n); | 
| Guido van Rossum | 71f477c | 1991-04-03 19:09:02 +0000 | [diff] [blame] | 103 |  | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 104 | static nfagrammar * | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 105 | newnfagrammar(void) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 106 | { | 
|  | 107 | nfagrammar *gr; | 
|  | 108 |  | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 109 | gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar)); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 110 | if (gr == NULL) | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 111 | Py_FatalError("no mem for new nfa grammar"); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 112 | gr->gr_nnfas = 0; | 
|  | 113 | gr->gr_nfa = NULL; | 
|  | 114 | gr->gr_ll.ll_nlabels = 0; | 
|  | 115 | gr->gr_ll.ll_label = NULL; | 
|  | 116 | addlabel(&gr->gr_ll, ENDMARKER, "EMPTY"); | 
|  | 117 | return gr; | 
|  | 118 | } | 
|  | 119 |  | 
|  | 120 | static nfa * | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 121 | addnfa(nfagrammar *gr, char *name) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 122 | { | 
|  | 123 | nfa *nf; | 
|  | 124 |  | 
|  | 125 | nf = newnfa(name); | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 126 | gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa, | 
| Guido van Rossum | 8ce8a78 | 2007-11-01 19:42:39 +0000 | [diff] [blame] | 127 | sizeof(nfa*) * (gr->gr_nnfas + 1)); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 128 | if (gr->gr_nfa == NULL) | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 129 | Py_FatalError("out of mem"); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 130 | gr->gr_nfa[gr->gr_nnfas++] = nf; | 
|  | 131 | addlabel(&gr->gr_ll, NAME, nf->nf_name); | 
|  | 132 | return nf; | 
|  | 133 | } | 
|  | 134 |  | 
| Guido van Rossum | 408027e | 1996-12-30 16:17:54 +0000 | [diff] [blame] | 135 | #ifdef Py_DEBUG | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 136 |  | 
|  | 137 | static char REQNFMT[] = "metacompile: less than %d children\n"; | 
|  | 138 |  | 
|  | 139 | #define REQN(i, count) \ | 
|  | 140 | if (i < count) { \ | 
|  | 141 | fprintf(stderr, REQNFMT, count); \ | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 142 | Py_FatalError("REQN"); \ | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 143 | } else | 
|  | 144 |  | 
|  | 145 | #else | 
|  | 146 | #define REQN(i, count)	/* empty */ | 
|  | 147 | #endif | 
|  | 148 |  | 
|  | 149 | static nfagrammar * | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 150 | metacompile(node *n) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 151 | { | 
|  | 152 | nfagrammar *gr; | 
|  | 153 | int i; | 
| Guido van Rossum | 25dfe2c | 2001-09-11 16:43:16 +0000 | [diff] [blame] | 154 |  | 
|  | 155 | if (Py_DebugFlag) | 
|  | 156 | printf("Compiling (meta-) parse tree into NFA grammar\n"); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 157 | gr = newnfagrammar(); | 
|  | 158 | REQ(n, MSTART); | 
|  | 159 | i = n->n_nchildren - 1; /* Last child is ENDMARKER */ | 
|  | 160 | n = n->n_child; | 
|  | 161 | for (; --i >= 0; n++) { | 
|  | 162 | if (n->n_type != NEWLINE) | 
|  | 163 | compile_rule(gr, n); | 
|  | 164 | } | 
|  | 165 | return gr; | 
|  | 166 | } | 
|  | 167 |  | 
| Guido van Rossum | fd8a393 | 1996-12-02 18:27:33 +0000 | [diff] [blame] | 168 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 169 | compile_rule(nfagrammar *gr, node *n) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 170 | { | 
|  | 171 | nfa *nf; | 
|  | 172 |  | 
|  | 173 | REQ(n, RULE); | 
|  | 174 | REQN(n->n_nchildren, 4); | 
|  | 175 | n = n->n_child; | 
|  | 176 | REQ(n, NAME); | 
|  | 177 | nf = addnfa(gr, n->n_str); | 
|  | 178 | n++; | 
|  | 179 | REQ(n, COLON); | 
|  | 180 | n++; | 
|  | 181 | REQ(n, RHS); | 
|  | 182 | compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish); | 
|  | 183 | n++; | 
|  | 184 | REQ(n, NEWLINE); | 
|  | 185 | } | 
|  | 186 |  | 
| Guido van Rossum | fd8a393 | 1996-12-02 18:27:33 +0000 | [diff] [blame] | 187 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 188 | compile_rhs(labellist *ll, nfa *nf, node *n, int *pa, int *pb) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 189 | { | 
|  | 190 | int i; | 
|  | 191 | int a, b; | 
|  | 192 |  | 
|  | 193 | REQ(n, RHS); | 
|  | 194 | i = n->n_nchildren; | 
|  | 195 | REQN(i, 1); | 
|  | 196 | n = n->n_child; | 
|  | 197 | REQ(n, ALT); | 
|  | 198 | compile_alt(ll, nf, n, pa, pb); | 
|  | 199 | if (--i <= 0) | 
|  | 200 | return; | 
|  | 201 | n++; | 
|  | 202 | a = *pa; | 
|  | 203 | b = *pb; | 
|  | 204 | *pa = addnfastate(nf); | 
|  | 205 | *pb = addnfastate(nf); | 
|  | 206 | addnfaarc(nf, *pa, a, EMPTY); | 
|  | 207 | addnfaarc(nf, b, *pb, EMPTY); | 
|  | 208 | for (; --i >= 0; n++) { | 
|  | 209 | REQ(n, VBAR); | 
|  | 210 | REQN(i, 1); | 
|  | 211 | --i; | 
|  | 212 | n++; | 
|  | 213 | REQ(n, ALT); | 
|  | 214 | compile_alt(ll, nf, n, &a, &b); | 
|  | 215 | addnfaarc(nf, *pa, a, EMPTY); | 
|  | 216 | addnfaarc(nf, b, *pb, EMPTY); | 
|  | 217 | } | 
|  | 218 | } | 
|  | 219 |  | 
| Guido van Rossum | fd8a393 | 1996-12-02 18:27:33 +0000 | [diff] [blame] | 220 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 221 | compile_alt(labellist *ll, nfa *nf, node *n, int *pa, int *pb) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 222 | { | 
|  | 223 | int i; | 
|  | 224 | int a, b; | 
|  | 225 |  | 
|  | 226 | REQ(n, ALT); | 
|  | 227 | i = n->n_nchildren; | 
|  | 228 | REQN(i, 1); | 
|  | 229 | n = n->n_child; | 
|  | 230 | REQ(n, ITEM); | 
|  | 231 | compile_item(ll, nf, n, pa, pb); | 
|  | 232 | --i; | 
|  | 233 | n++; | 
|  | 234 | for (; --i >= 0; n++) { | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 235 | REQ(n, ITEM); | 
|  | 236 | compile_item(ll, nf, n, &a, &b); | 
|  | 237 | addnfaarc(nf, *pb, a, EMPTY); | 
|  | 238 | *pb = b; | 
|  | 239 | } | 
|  | 240 | } | 
|  | 241 |  | 
| Guido van Rossum | fd8a393 | 1996-12-02 18:27:33 +0000 | [diff] [blame] | 242 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 243 | compile_item(labellist *ll, nfa *nf, node *n, int *pa, int *pb) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 244 | { | 
|  | 245 | int i; | 
|  | 246 | int a, b; | 
|  | 247 |  | 
|  | 248 | REQ(n, ITEM); | 
|  | 249 | i = n->n_nchildren; | 
|  | 250 | REQN(i, 1); | 
|  | 251 | n = n->n_child; | 
|  | 252 | if (n->n_type == LSQB) { | 
|  | 253 | REQN(i, 3); | 
|  | 254 | n++; | 
|  | 255 | REQ(n, RHS); | 
|  | 256 | *pa = addnfastate(nf); | 
|  | 257 | *pb = addnfastate(nf); | 
|  | 258 | addnfaarc(nf, *pa, *pb, EMPTY); | 
|  | 259 | compile_rhs(ll, nf, n, &a, &b); | 
|  | 260 | addnfaarc(nf, *pa, a, EMPTY); | 
|  | 261 | addnfaarc(nf, b, *pb, EMPTY); | 
|  | 262 | REQN(i, 1); | 
|  | 263 | n++; | 
|  | 264 | REQ(n, RSQB); | 
|  | 265 | } | 
|  | 266 | else { | 
|  | 267 | compile_atom(ll, nf, n, pa, pb); | 
|  | 268 | if (--i <= 0) | 
|  | 269 | return; | 
|  | 270 | n++; | 
|  | 271 | addnfaarc(nf, *pb, *pa, EMPTY); | 
|  | 272 | if (n->n_type == STAR) | 
|  | 273 | *pb = *pa; | 
|  | 274 | else | 
|  | 275 | REQ(n, PLUS); | 
|  | 276 | } | 
|  | 277 | } | 
|  | 278 |  | 
| Guido van Rossum | fd8a393 | 1996-12-02 18:27:33 +0000 | [diff] [blame] | 279 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 280 | compile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 281 | { | 
| Neal Norwitz | 46aae19 | 2006-01-08 02:06:01 +0000 | [diff] [blame] | 282 | int i; | 
|  | 283 |  | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 284 | REQ(n, ATOM); | 
| Neal Norwitz | 46aae19 | 2006-01-08 02:06:01 +0000 | [diff] [blame] | 285 | i = n->n_nchildren; | 
|  | 286 | REQN(i, 1); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 287 | n = n->n_child; | 
|  | 288 | if (n->n_type == LPAR) { | 
| Neal Norwitz | 46aae19 | 2006-01-08 02:06:01 +0000 | [diff] [blame] | 289 | REQN(i, 3); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 290 | n++; | 
|  | 291 | REQ(n, RHS); | 
|  | 292 | compile_rhs(ll, nf, n, pa, pb); | 
|  | 293 | n++; | 
|  | 294 | REQ(n, RPAR); | 
|  | 295 | } | 
|  | 296 | else if (n->n_type == NAME || n->n_type == STRING) { | 
|  | 297 | *pa = addnfastate(nf); | 
|  | 298 | *pb = addnfastate(nf); | 
|  | 299 | addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str)); | 
|  | 300 | } | 
|  | 301 | else | 
|  | 302 | REQ(n, NAME); | 
|  | 303 | } | 
|  | 304 |  | 
|  | 305 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 306 | dumpstate(labellist *ll, nfa *nf, int istate) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 307 | { | 
|  | 308 | nfastate *st; | 
|  | 309 | int i; | 
|  | 310 | nfaarc *ar; | 
|  | 311 |  | 
|  | 312 | printf("%c%2d%c", | 
|  | 313 | istate == nf->nf_start ? '*' : ' ', | 
|  | 314 | istate, | 
|  | 315 | istate == nf->nf_finish ? '.' : ' '); | 
|  | 316 | st = &nf->nf_state[istate]; | 
|  | 317 | ar = st->st_arc; | 
|  | 318 | for (i = 0; i < st->st_narcs; i++) { | 
|  | 319 | if (i > 0) | 
|  | 320 | printf("\n    "); | 
|  | 321 | printf("-> %2d  %s", ar->ar_arrow, | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 322 | PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label])); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 323 | ar++; | 
|  | 324 | } | 
|  | 325 | printf("\n"); | 
|  | 326 | } | 
|  | 327 |  | 
|  | 328 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 329 | dumpnfa(labellist *ll, nfa *nf) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 330 | { | 
|  | 331 | int i; | 
|  | 332 |  | 
|  | 333 | printf("NFA '%s' has %d states; start %d, finish %d\n", | 
|  | 334 | nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish); | 
|  | 335 | for (i = 0; i < nf->nf_nstates; i++) | 
|  | 336 | dumpstate(ll, nf, i); | 
|  | 337 | } | 
|  | 338 |  | 
|  | 339 |  | 
|  | 340 | /* PART TWO -- CONSTRUCT DFA -- Algorithm 3.1 from [Aho&Ullman 77] */ | 
|  | 341 |  | 
| Guido van Rossum | 588633d | 1994-12-30 15:46:02 +0000 | [diff] [blame] | 342 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 343 | addclosure(bitset ss, nfa *nf, int istate) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 344 | { | 
|  | 345 | if (addbit(ss, istate)) { | 
|  | 346 | nfastate *st = &nf->nf_state[istate]; | 
|  | 347 | nfaarc *ar = st->st_arc; | 
|  | 348 | int i; | 
|  | 349 |  | 
|  | 350 | for (i = st->st_narcs; --i >= 0; ) { | 
|  | 351 | if (ar->ar_label == EMPTY) | 
|  | 352 | addclosure(ss, nf, ar->ar_arrow); | 
|  | 353 | ar++; | 
|  | 354 | } | 
|  | 355 | } | 
|  | 356 | } | 
|  | 357 |  | 
|  | 358 | typedef struct _ss_arc { | 
|  | 359 | bitset	sa_bitset; | 
|  | 360 | int	sa_arrow; | 
|  | 361 | int	sa_label; | 
|  | 362 | } ss_arc; | 
|  | 363 |  | 
|  | 364 | typedef struct _ss_state { | 
|  | 365 | bitset	ss_ss; | 
|  | 366 | int	ss_narcs; | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 367 | struct _ss_arc	*ss_arc; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 368 | int	ss_deleted; | 
|  | 369 | int	ss_finish; | 
|  | 370 | int	ss_rename; | 
|  | 371 | } ss_state; | 
|  | 372 |  | 
|  | 373 | typedef struct _ss_dfa { | 
|  | 374 | int	sd_nstates; | 
|  | 375 | ss_state *sd_state; | 
|  | 376 | } ss_dfa; | 
|  | 377 |  | 
| Guido van Rossum | 71f477c | 1991-04-03 19:09:02 +0000 | [diff] [blame] | 378 | /* Forward */ | 
| Tim Peters | dbd9ba6 | 2000-07-09 03:09:57 +0000 | [diff] [blame] | 379 | static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits, | 
|  | 380 | labellist *ll, char *msg); | 
|  | 381 | static void simplify(int xx_nstates, ss_state *xx_state); | 
|  | 382 | static void convert(dfa *d, int xx_nstates, ss_state *xx_state); | 
| Guido van Rossum | 71f477c | 1991-04-03 19:09:02 +0000 | [diff] [blame] | 383 |  | 
| Guido van Rossum | fd8a393 | 1996-12-02 18:27:33 +0000 | [diff] [blame] | 384 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 385 | makedfa(nfagrammar *gr, nfa *nf, dfa *d) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 386 | { | 
|  | 387 | int nbits = nf->nf_nstates; | 
|  | 388 | bitset ss; | 
|  | 389 | int xx_nstates; | 
|  | 390 | ss_state *xx_state, *yy; | 
|  | 391 | ss_arc *zz; | 
|  | 392 | int istate, jstate, iarc, jarc, ibit; | 
|  | 393 | nfastate *st; | 
|  | 394 | nfaarc *ar; | 
|  | 395 |  | 
|  | 396 | ss = newbitset(nbits); | 
|  | 397 | addclosure(ss, nf, nf->nf_start); | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 398 | xx_state = (ss_state *)PyObject_MALLOC(sizeof(ss_state)); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 399 | if (xx_state == NULL) | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 400 | Py_FatalError("no mem for xx_state in makedfa"); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 401 | xx_nstates = 1; | 
|  | 402 | yy = &xx_state[0]; | 
|  | 403 | yy->ss_ss = ss; | 
|  | 404 | yy->ss_narcs = 0; | 
|  | 405 | yy->ss_arc = NULL; | 
|  | 406 | yy->ss_deleted = 0; | 
|  | 407 | yy->ss_finish = testbit(ss, nf->nf_finish); | 
|  | 408 | if (yy->ss_finish) | 
|  | 409 | printf("Error: nonterminal '%s' may produce empty.\n", | 
|  | 410 | nf->nf_name); | 
|  | 411 |  | 
|  | 412 | /* This algorithm is from a book written before | 
|  | 413 | the invention of structured programming... */ | 
|  | 414 |  | 
|  | 415 | /* For each unmarked state... */ | 
|  | 416 | for (istate = 0; istate < xx_nstates; ++istate) { | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 417 | size_t size; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 418 | yy = &xx_state[istate]; | 
|  | 419 | ss = yy->ss_ss; | 
|  | 420 | /* For all its states... */ | 
|  | 421 | for (ibit = 0; ibit < nf->nf_nstates; ++ibit) { | 
|  | 422 | if (!testbit(ss, ibit)) | 
|  | 423 | continue; | 
|  | 424 | st = &nf->nf_state[ibit]; | 
|  | 425 | /* For all non-empty arcs from this state... */ | 
|  | 426 | for (iarc = 0; iarc < st->st_narcs; iarc++) { | 
|  | 427 | ar = &st->st_arc[iarc]; | 
|  | 428 | if (ar->ar_label == EMPTY) | 
|  | 429 | continue; | 
|  | 430 | /* Look up in list of arcs from this state */ | 
|  | 431 | for (jarc = 0; jarc < yy->ss_narcs; ++jarc) { | 
|  | 432 | zz = &yy->ss_arc[jarc]; | 
|  | 433 | if (ar->ar_label == zz->sa_label) | 
|  | 434 | goto found; | 
|  | 435 | } | 
|  | 436 | /* Add new arc for this state */ | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 437 | size = sizeof(ss_arc) * (yy->ss_narcs + 1); | 
|  | 438 | yy->ss_arc = (ss_arc *)PyObject_REALLOC( | 
|  | 439 | yy->ss_arc, size); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 440 | if (yy->ss_arc == NULL) | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 441 | Py_FatalError("out of mem"); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 442 | zz = &yy->ss_arc[yy->ss_narcs++]; | 
|  | 443 | zz->sa_label = ar->ar_label; | 
|  | 444 | zz->sa_bitset = newbitset(nbits); | 
|  | 445 | zz->sa_arrow = -1; | 
|  | 446 | found:	; | 
|  | 447 | /* Add destination */ | 
|  | 448 | addclosure(zz->sa_bitset, nf, ar->ar_arrow); | 
|  | 449 | } | 
|  | 450 | } | 
|  | 451 | /* Now look up all the arrow states */ | 
|  | 452 | for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) { | 
|  | 453 | zz = &xx_state[istate].ss_arc[jarc]; | 
|  | 454 | for (jstate = 0; jstate < xx_nstates; jstate++) { | 
|  | 455 | if (samebitset(zz->sa_bitset, | 
|  | 456 | xx_state[jstate].ss_ss, nbits)) { | 
|  | 457 | zz->sa_arrow = jstate; | 
|  | 458 | goto done; | 
|  | 459 | } | 
|  | 460 | } | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 461 | size = sizeof(ss_state) * (xx_nstates + 1); | 
|  | 462 | xx_state = (ss_state *)PyObject_REALLOC(xx_state, | 
|  | 463 | size); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 464 | if (xx_state == NULL) | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 465 | Py_FatalError("out of mem"); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 466 | zz->sa_arrow = xx_nstates; | 
|  | 467 | yy = &xx_state[xx_nstates++]; | 
|  | 468 | yy->ss_ss = zz->sa_bitset; | 
|  | 469 | yy->ss_narcs = 0; | 
|  | 470 | yy->ss_arc = NULL; | 
|  | 471 | yy->ss_deleted = 0; | 
|  | 472 | yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish); | 
|  | 473 | done:	; | 
|  | 474 | } | 
|  | 475 | } | 
|  | 476 |  | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 477 | if (Py_DebugFlag) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 478 | printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll, | 
|  | 479 | "before minimizing"); | 
|  | 480 |  | 
|  | 481 | simplify(xx_nstates, xx_state); | 
|  | 482 |  | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 483 | if (Py_DebugFlag) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 484 | printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll, | 
|  | 485 | "after minimizing"); | 
|  | 486 |  | 
|  | 487 | convert(d, xx_nstates, xx_state); | 
|  | 488 |  | 
|  | 489 | /* XXX cleanup */ | 
| Guido van Rossum | 8ce8a78 | 2007-11-01 19:42:39 +0000 | [diff] [blame] | 490 | PyObject_FREE(xx_state); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 491 | } | 
|  | 492 |  | 
| Guido van Rossum | fd8a393 | 1996-12-02 18:27:33 +0000 | [diff] [blame] | 493 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 494 | printssdfa(int xx_nstates, ss_state *xx_state, int nbits, | 
|  | 495 | labellist *ll, char *msg) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 496 | { | 
|  | 497 | int i, ibit, iarc; | 
|  | 498 | ss_state *yy; | 
|  | 499 | ss_arc *zz; | 
|  | 500 |  | 
|  | 501 | printf("Subset DFA %s\n", msg); | 
|  | 502 | for (i = 0; i < xx_nstates; i++) { | 
|  | 503 | yy = &xx_state[i]; | 
|  | 504 | if (yy->ss_deleted) | 
|  | 505 | continue; | 
|  | 506 | printf(" Subset %d", i); | 
|  | 507 | if (yy->ss_finish) | 
|  | 508 | printf(" (finish)"); | 
|  | 509 | printf(" { "); | 
|  | 510 | for (ibit = 0; ibit < nbits; ibit++) { | 
|  | 511 | if (testbit(yy->ss_ss, ibit)) | 
|  | 512 | printf("%d ", ibit); | 
|  | 513 | } | 
|  | 514 | printf("}\n"); | 
|  | 515 | for (iarc = 0; iarc < yy->ss_narcs; iarc++) { | 
|  | 516 | zz = &yy->ss_arc[iarc]; | 
|  | 517 | printf("  Arc to state %d, label %s\n", | 
|  | 518 | zz->sa_arrow, | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 519 | PyGrammar_LabelRepr( | 
|  | 520 | &ll->ll_label[zz->sa_label])); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 521 | } | 
|  | 522 | } | 
|  | 523 | } | 
|  | 524 |  | 
|  | 525 |  | 
|  | 526 | /* PART THREE -- SIMPLIFY DFA */ | 
|  | 527 |  | 
|  | 528 | /* Simplify the DFA by repeatedly eliminating states that are | 
|  | 529 | equivalent to another oner.  This is NOT Algorithm 3.3 from | 
|  | 530 | [Aho&Ullman 77].  It does not always finds the minimal DFA, | 
|  | 531 | but it does usually make a much smaller one...  (For an example | 
| Thomas Wouters | 7e47402 | 2000-07-16 12:04:32 +0000 | [diff] [blame] | 532 | of sub-optimal behavior, try S: x a b+ | y a b+.) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 533 | */ | 
|  | 534 |  | 
|  | 535 | static int | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 536 | samestate(ss_state *s1, ss_state *s2) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 537 | { | 
|  | 538 | int i; | 
|  | 539 |  | 
|  | 540 | if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish) | 
|  | 541 | return 0; | 
|  | 542 | for (i = 0; i < s1->ss_narcs; i++) { | 
|  | 543 | if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow || | 
|  | 544 | s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label) | 
|  | 545 | return 0; | 
|  | 546 | } | 
|  | 547 | return 1; | 
|  | 548 | } | 
|  | 549 |  | 
|  | 550 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 551 | renamestates(int xx_nstates, ss_state *xx_state, int from, int to) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 552 | { | 
|  | 553 | int i, j; | 
|  | 554 |  | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 555 | if (Py_DebugFlag) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 556 | printf("Rename state %d to %d.\n", from, to); | 
|  | 557 | for (i = 0; i < xx_nstates; i++) { | 
|  | 558 | if (xx_state[i].ss_deleted) | 
|  | 559 | continue; | 
|  | 560 | for (j = 0; j < xx_state[i].ss_narcs; j++) { | 
|  | 561 | if (xx_state[i].ss_arc[j].sa_arrow == from) | 
|  | 562 | xx_state[i].ss_arc[j].sa_arrow = to; | 
|  | 563 | } | 
|  | 564 | } | 
|  | 565 | } | 
|  | 566 |  | 
| Guido van Rossum | fd8a393 | 1996-12-02 18:27:33 +0000 | [diff] [blame] | 567 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 568 | simplify(int xx_nstates, ss_state *xx_state) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 569 | { | 
|  | 570 | int changes; | 
| Guido van Rossum | 588633d | 1994-12-30 15:46:02 +0000 | [diff] [blame] | 571 | int i, j; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 572 |  | 
|  | 573 | do { | 
|  | 574 | changes = 0; | 
|  | 575 | for (i = 1; i < xx_nstates; i++) { | 
|  | 576 | if (xx_state[i].ss_deleted) | 
|  | 577 | continue; | 
|  | 578 | for (j = 0; j < i; j++) { | 
|  | 579 | if (xx_state[j].ss_deleted) | 
|  | 580 | continue; | 
|  | 581 | if (samestate(&xx_state[i], &xx_state[j])) { | 
|  | 582 | xx_state[i].ss_deleted++; | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 583 | renamestates(xx_nstates, xx_state, | 
|  | 584 | i, j); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 585 | changes++; | 
|  | 586 | break; | 
|  | 587 | } | 
|  | 588 | } | 
|  | 589 | } | 
|  | 590 | } while (changes); | 
|  | 591 | } | 
|  | 592 |  | 
|  | 593 |  | 
|  | 594 | /* PART FOUR -- GENERATE PARSING TABLES */ | 
|  | 595 |  | 
|  | 596 | /* Convert the DFA into a grammar that can be used by our parser */ | 
|  | 597 |  | 
| Guido van Rossum | fd8a393 | 1996-12-02 18:27:33 +0000 | [diff] [blame] | 598 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 599 | convert(dfa *d, int xx_nstates, ss_state *xx_state) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 600 | { | 
|  | 601 | int i, j; | 
|  | 602 | ss_state *yy; | 
|  | 603 | ss_arc *zz; | 
|  | 604 |  | 
|  | 605 | for (i = 0; i < xx_nstates; i++) { | 
|  | 606 | yy = &xx_state[i]; | 
|  | 607 | if (yy->ss_deleted) | 
|  | 608 | continue; | 
|  | 609 | yy->ss_rename = addstate(d); | 
|  | 610 | } | 
|  | 611 |  | 
|  | 612 | for (i = 0; i < xx_nstates; i++) { | 
|  | 613 | yy = &xx_state[i]; | 
|  | 614 | if (yy->ss_deleted) | 
|  | 615 | continue; | 
|  | 616 | for (j = 0; j < yy->ss_narcs; j++) { | 
|  | 617 | zz = &yy->ss_arc[j]; | 
|  | 618 | addarc(d, yy->ss_rename, | 
|  | 619 | xx_state[zz->sa_arrow].ss_rename, | 
|  | 620 | zz->sa_label); | 
|  | 621 | } | 
|  | 622 | if (yy->ss_finish) | 
|  | 623 | addarc(d, yy->ss_rename, yy->ss_rename, 0); | 
|  | 624 | } | 
|  | 625 |  | 
|  | 626 | d->d_initial = 0; | 
|  | 627 | } | 
|  | 628 |  | 
|  | 629 |  | 
|  | 630 | /* PART FIVE -- GLUE IT ALL TOGETHER */ | 
|  | 631 |  | 
|  | 632 | static grammar * | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 633 | maketables(nfagrammar *gr) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 634 | { | 
|  | 635 | int i; | 
|  | 636 | nfa *nf; | 
|  | 637 | dfa *d; | 
|  | 638 | grammar *g; | 
|  | 639 |  | 
|  | 640 | if (gr->gr_nnfas == 0) | 
|  | 641 | return NULL; | 
|  | 642 | g = newgrammar(gr->gr_nfa[0]->nf_type); | 
|  | 643 | /* XXX first rule must be start rule */ | 
|  | 644 | g->g_ll = gr->gr_ll; | 
|  | 645 |  | 
|  | 646 | for (i = 0; i < gr->gr_nnfas; i++) { | 
|  | 647 | nf = gr->gr_nfa[i]; | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 648 | if (Py_DebugFlag) { | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 649 | printf("Dump of NFA for '%s' ...\n", nf->nf_name); | 
|  | 650 | dumpnfa(&gr->gr_ll, nf); | 
| Guido van Rossum | 25dfe2c | 2001-09-11 16:43:16 +0000 | [diff] [blame] | 651 | printf("Making DFA for '%s' ...\n", nf->nf_name); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 652 | } | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 653 | d = adddfa(g, nf->nf_type, nf->nf_name); | 
|  | 654 | makedfa(gr, gr->gr_nfa[i], d); | 
|  | 655 | } | 
|  | 656 |  | 
|  | 657 | return g; | 
|  | 658 | } | 
|  | 659 |  | 
|  | 660 | grammar * | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 661 | pgen(node *n) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 662 | { | 
|  | 663 | nfagrammar *gr; | 
|  | 664 | grammar *g; | 
|  | 665 |  | 
|  | 666 | gr = metacompile(n); | 
|  | 667 | g = maketables(gr); | 
|  | 668 | translatelabels(g); | 
|  | 669 | addfirstsets(g); | 
| Christian Heimes | 679db4a | 2008-01-18 09:56:22 +0000 | [diff] [blame] | 670 | PyObject_FREE(gr); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 671 | return g; | 
|  | 672 | } | 
|  | 673 |  | 
| Guido van Rossum | d3ab37f | 2003-04-17 14:55:42 +0000 | [diff] [blame] | 674 | grammar * | 
|  | 675 | Py_pgen(node *n) | 
|  | 676 | { | 
|  | 677 | return pgen(n); | 
|  | 678 | } | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 679 |  | 
|  | 680 | /* | 
|  | 681 |  | 
|  | 682 | Description | 
|  | 683 | ----------- | 
|  | 684 |  | 
|  | 685 | Input is a grammar in extended BNF (using * for repetition, + for | 
|  | 686 | at-least-once repetition, [] for optional parts, | for alternatives and | 
|  | 687 | () for grouping).  This has already been parsed and turned into a parse | 
|  | 688 | tree. | 
|  | 689 |  | 
|  | 690 | Each rule is considered as a regular expression in its own right. | 
|  | 691 | It is turned into a Non-deterministic Finite Automaton (NFA), which | 
|  | 692 | is then turned into a Deterministic Finite Automaton (DFA), which is then | 
|  | 693 | optimized to reduce the number of states.  See [Aho&Ullman 77] chapter 3, | 
|  | 694 | or similar compiler books (this technique is more often used for lexical | 
|  | 695 | analyzers). | 
|  | 696 |  | 
|  | 697 | The DFA's are used by the parser as parsing tables in a special way | 
|  | 698 | that's probably unique.  Before they are usable, the FIRST sets of all | 
|  | 699 | non-terminals are computed. | 
|  | 700 |  | 
|  | 701 | Reference | 
|  | 702 | --------- | 
|  | 703 |  | 
|  | 704 | [Aho&Ullman 77] | 
|  | 705 | Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977 | 
|  | 706 | (first edition) | 
|  | 707 |  | 
|  | 708 | */ |