| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 1 | /* Parser generator */ | 
 | 2 |  | 
 | 3 | /* For a description, see the comments at end of this file */ | 
 | 4 |  | 
| Tim Peters | 1ca1296 | 2001-12-04 03:18:48 +0000 | [diff] [blame] | 5 | #include "Python.h" | 
| Guido van Rossum | 3f5da24 | 1990-12-20 15:06:42 +0000 | [diff] [blame] | 6 | #include "pgenheaders.h" | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 7 | #include "token.h" | 
 | 8 | #include "node.h" | 
 | 9 | #include "grammar.h" | 
 | 10 | #include "metagrammar.h" | 
 | 11 | #include "pgen.h" | 
 | 12 |  | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 13 | extern int Py_DebugFlag; | 
| Neal Norwitz | 5352d8c | 2002-05-31 13:11:40 +0000 | [diff] [blame] | 14 | extern int Py_IgnoreEnvironmentFlag; /* needed by Py_GETENV */ | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 15 |  | 
 | 16 |  | 
 | 17 | /* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */ | 
 | 18 |  | 
 | 19 | typedef struct _nfaarc { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 20 |     int         ar_label; | 
 | 21 |     int         ar_arrow; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 22 | } nfaarc; | 
 | 23 |  | 
 | 24 | typedef struct _nfastate { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 25 |     int         st_narcs; | 
 | 26 |     nfaarc      *st_arc; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 27 | } nfastate; | 
 | 28 |  | 
 | 29 | typedef struct _nfa { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 30 |     int                 nf_type; | 
 | 31 |     char                *nf_name; | 
 | 32 |     int                 nf_nstates; | 
 | 33 |     nfastate            *nf_state; | 
 | 34 |     int                 nf_start, nf_finish; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 35 | } nfa; | 
 | 36 |  | 
| Guido van Rossum | 71f477c | 1991-04-03 19:09:02 +0000 | [diff] [blame] | 37 | /* Forward */ | 
| Tim Peters | dbd9ba6 | 2000-07-09 03:09:57 +0000 | [diff] [blame] | 38 | static void compile_rhs(labellist *ll, | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 39 |                         nfa *nf, node *n, int *pa, int *pb); | 
| Tim Peters | dbd9ba6 | 2000-07-09 03:09:57 +0000 | [diff] [blame] | 40 | static void compile_alt(labellist *ll, | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 41 |                         nfa *nf, node *n, int *pa, int *pb); | 
| Tim Peters | dbd9ba6 | 2000-07-09 03:09:57 +0000 | [diff] [blame] | 42 | static void compile_item(labellist *ll, | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 43 |                          nfa *nf, node *n, int *pa, int *pb); | 
| Tim Peters | dbd9ba6 | 2000-07-09 03:09:57 +0000 | [diff] [blame] | 44 | static void compile_atom(labellist *ll, | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 45 |                          nfa *nf, node *n, int *pa, int *pb); | 
| Guido van Rossum | 71f477c | 1991-04-03 19:09:02 +0000 | [diff] [blame] | 46 |  | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 47 | static int | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 48 | addnfastate(nfa *nf) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 49 | { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 50 |     nfastate *st; | 
 | 51 |  | 
 | 52 |     nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state, | 
 | 53 |                                 sizeof(nfastate) * (nf->nf_nstates + 1)); | 
 | 54 |     if (nf->nf_state == NULL) | 
 | 55 |         Py_FatalError("out of mem"); | 
 | 56 |     st = &nf->nf_state[nf->nf_nstates++]; | 
 | 57 |     st->st_narcs = 0; | 
 | 58 |     st->st_arc = NULL; | 
 | 59 |     return st - nf->nf_state; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 60 | } | 
 | 61 |  | 
 | 62 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 63 | addnfaarc(nfa *nf, int from, int to, int lbl) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 64 | { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 65 |     nfastate *st; | 
 | 66 |     nfaarc *ar; | 
 | 67 |  | 
 | 68 |     st = &nf->nf_state[from]; | 
 | 69 |     st->st_arc = (nfaarc *)PyObject_REALLOC(st->st_arc, | 
 | 70 |                                   sizeof(nfaarc) * (st->st_narcs + 1)); | 
 | 71 |     if (st->st_arc == NULL) | 
 | 72 |         Py_FatalError("out of mem"); | 
 | 73 |     ar = &st->st_arc[st->st_narcs++]; | 
 | 74 |     ar->ar_label = lbl; | 
 | 75 |     ar->ar_arrow = to; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 76 | } | 
 | 77 |  | 
 | 78 | static nfa * | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 79 | newnfa(char *name) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 80 | { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 81 |     nfa *nf; | 
 | 82 |     static int type = NT_OFFSET; /* All types will be disjunct */ | 
 | 83 |  | 
 | 84 |     nf = (nfa *)PyObject_MALLOC(sizeof(nfa)); | 
 | 85 |     if (nf == NULL) | 
 | 86 |         Py_FatalError("no mem for new nfa"); | 
 | 87 |     nf->nf_type = type++; | 
 | 88 |     nf->nf_name = name; /* XXX strdup(name) ??? */ | 
 | 89 |     nf->nf_nstates = 0; | 
 | 90 |     nf->nf_state = NULL; | 
 | 91 |     nf->nf_start = nf->nf_finish = -1; | 
 | 92 |     return nf; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 93 | } | 
 | 94 |  | 
 | 95 | typedef struct _nfagrammar { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 96 |     int                 gr_nnfas; | 
 | 97 |     nfa                 **gr_nfa; | 
 | 98 |     labellist           gr_ll; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 99 | } nfagrammar; | 
 | 100 |  | 
| Guido van Rossum | 71f477c | 1991-04-03 19:09:02 +0000 | [diff] [blame] | 101 | /* Forward */ | 
| Tim Peters | dbd9ba6 | 2000-07-09 03:09:57 +0000 | [diff] [blame] | 102 | static void compile_rule(nfagrammar *gr, node *n); | 
| Guido van Rossum | 71f477c | 1991-04-03 19:09:02 +0000 | [diff] [blame] | 103 |  | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 104 | static nfagrammar * | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 105 | newnfagrammar(void) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 106 | { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 107 |     nfagrammar *gr; | 
 | 108 |  | 
 | 109 |     gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar)); | 
 | 110 |     if (gr == NULL) | 
 | 111 |         Py_FatalError("no mem for new nfa grammar"); | 
 | 112 |     gr->gr_nnfas = 0; | 
 | 113 |     gr->gr_nfa = NULL; | 
 | 114 |     gr->gr_ll.ll_nlabels = 0; | 
 | 115 |     gr->gr_ll.ll_label = NULL; | 
 | 116 |     addlabel(&gr->gr_ll, ENDMARKER, "EMPTY"); | 
 | 117 |     return gr; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 118 | } | 
 | 119 |  | 
| Benjamin Peterson | 9ac11a7 | 2016-09-18 18:00:25 -0700 | [diff] [blame] | 120 | static void | 
 | 121 | freenfagrammar(nfagrammar *gr) | 
 | 122 | { | 
 | 123 |     for (int i = 0; i < gr->gr_nnfas; i++) { | 
 | 124 |         PyObject_FREE(gr->gr_nfa[i]->nf_state); | 
 | 125 |     } | 
 | 126 |     PyObject_FREE(gr->gr_nfa); | 
 | 127 |     PyObject_FREE(gr); | 
 | 128 | } | 
 | 129 |  | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 130 | static nfa * | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 131 | addnfa(nfagrammar *gr, char *name) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 132 | { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 133 |     nfa *nf; | 
 | 134 |  | 
 | 135 |     nf = newnfa(name); | 
 | 136 |     gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa, | 
 | 137 |                                   sizeof(nfa*) * (gr->gr_nnfas + 1)); | 
 | 138 |     if (gr->gr_nfa == NULL) | 
 | 139 |         Py_FatalError("out of mem"); | 
 | 140 |     gr->gr_nfa[gr->gr_nnfas++] = nf; | 
 | 141 |     addlabel(&gr->gr_ll, NAME, nf->nf_name); | 
 | 142 |     return nf; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 143 | } | 
 | 144 |  | 
| Guido van Rossum | 408027e | 1996-12-30 16:17:54 +0000 | [diff] [blame] | 145 | #ifdef Py_DEBUG | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 146 |  | 
| Serhiy Storchaka | 2d06e84 | 2015-12-25 19:53:18 +0200 | [diff] [blame] | 147 | static const char REQNFMT[] = "metacompile: less than %d children\n"; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 148 |  | 
| Serhiy Storchaka | a8cd4d4 | 2015-04-03 15:24:33 +0300 | [diff] [blame] | 149 | #define REQN(i, count) do { \ | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 150 |     if (i < count) { \ | 
 | 151 |         fprintf(stderr, REQNFMT, count); \ | 
 | 152 |         Py_FatalError("REQN"); \ | 
| Serhiy Storchaka | a8cd4d4 | 2015-04-03 15:24:33 +0300 | [diff] [blame] | 153 |     } \ | 
 | 154 | } while (0) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 155 |  | 
 | 156 | #else | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 157 | #define REQN(i, count)  /* empty */ | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 158 | #endif | 
 | 159 |  | 
 | 160 | static nfagrammar * | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 161 | metacompile(node *n) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 162 | { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 163 |     nfagrammar *gr; | 
 | 164 |     int i; | 
| Guido van Rossum | 25dfe2c | 2001-09-11 16:43:16 +0000 | [diff] [blame] | 165 |  | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 166 |     if (Py_DebugFlag) | 
 | 167 |         printf("Compiling (meta-) parse tree into NFA grammar\n"); | 
 | 168 |     gr = newnfagrammar(); | 
 | 169 |     REQ(n, MSTART); | 
 | 170 |     i = n->n_nchildren - 1; /* Last child is ENDMARKER */ | 
 | 171 |     n = n->n_child; | 
 | 172 |     for (; --i >= 0; n++) { | 
 | 173 |         if (n->n_type != NEWLINE) | 
 | 174 |             compile_rule(gr, n); | 
 | 175 |     } | 
 | 176 |     return gr; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 177 | } | 
 | 178 |  | 
| Guido van Rossum | fd8a393 | 1996-12-02 18:27:33 +0000 | [diff] [blame] | 179 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 180 | compile_rule(nfagrammar *gr, node *n) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 181 | { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 182 |     nfa *nf; | 
 | 183 |  | 
 | 184 |     REQ(n, RULE); | 
 | 185 |     REQN(n->n_nchildren, 4); | 
 | 186 |     n = n->n_child; | 
 | 187 |     REQ(n, NAME); | 
 | 188 |     nf = addnfa(gr, n->n_str); | 
 | 189 |     n++; | 
 | 190 |     REQ(n, COLON); | 
 | 191 |     n++; | 
 | 192 |     REQ(n, RHS); | 
 | 193 |     compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish); | 
 | 194 |     n++; | 
 | 195 |     REQ(n, NEWLINE); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 196 | } | 
 | 197 |  | 
| Guido van Rossum | fd8a393 | 1996-12-02 18:27:33 +0000 | [diff] [blame] | 198 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 199 | compile_rhs(labellist *ll, nfa *nf, node *n, int *pa, int *pb) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 200 | { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 201 |     int i; | 
 | 202 |     int a, b; | 
 | 203 |  | 
 | 204 |     REQ(n, RHS); | 
 | 205 |     i = n->n_nchildren; | 
 | 206 |     REQN(i, 1); | 
 | 207 |     n = n->n_child; | 
 | 208 |     REQ(n, ALT); | 
 | 209 |     compile_alt(ll, nf, n, pa, pb); | 
 | 210 |     if (--i <= 0) | 
 | 211 |         return; | 
 | 212 |     n++; | 
 | 213 |     a = *pa; | 
 | 214 |     b = *pb; | 
 | 215 |     *pa = addnfastate(nf); | 
 | 216 |     *pb = addnfastate(nf); | 
 | 217 |     addnfaarc(nf, *pa, a, EMPTY); | 
 | 218 |     addnfaarc(nf, b, *pb, EMPTY); | 
 | 219 |     for (; --i >= 0; n++) { | 
 | 220 |         REQ(n, VBAR); | 
 | 221 |         REQN(i, 1); | 
 | 222 |         --i; | 
 | 223 |         n++; | 
 | 224 |         REQ(n, ALT); | 
 | 225 |         compile_alt(ll, nf, n, &a, &b); | 
 | 226 |         addnfaarc(nf, *pa, a, EMPTY); | 
 | 227 |         addnfaarc(nf, b, *pb, EMPTY); | 
 | 228 |     } | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 229 | } | 
 | 230 |  | 
| Guido van Rossum | fd8a393 | 1996-12-02 18:27:33 +0000 | [diff] [blame] | 231 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 232 | compile_alt(labellist *ll, nfa *nf, node *n, int *pa, int *pb) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 233 | { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 234 |     int i; | 
 | 235 |     int a, b; | 
 | 236 |  | 
 | 237 |     REQ(n, ALT); | 
 | 238 |     i = n->n_nchildren; | 
 | 239 |     REQN(i, 1); | 
 | 240 |     n = n->n_child; | 
 | 241 |     REQ(n, ITEM); | 
 | 242 |     compile_item(ll, nf, n, pa, pb); | 
 | 243 |     --i; | 
 | 244 |     n++; | 
 | 245 |     for (; --i >= 0; n++) { | 
 | 246 |         REQ(n, ITEM); | 
 | 247 |         compile_item(ll, nf, n, &a, &b); | 
 | 248 |         addnfaarc(nf, *pb, a, EMPTY); | 
 | 249 |         *pb = b; | 
 | 250 |     } | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 251 | } | 
 | 252 |  | 
| Guido van Rossum | fd8a393 | 1996-12-02 18:27:33 +0000 | [diff] [blame] | 253 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 254 | compile_item(labellist *ll, nfa *nf, node *n, int *pa, int *pb) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 255 | { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 256 |     int i; | 
 | 257 |     int a, b; | 
 | 258 |  | 
 | 259 |     REQ(n, ITEM); | 
 | 260 |     i = n->n_nchildren; | 
 | 261 |     REQN(i, 1); | 
 | 262 |     n = n->n_child; | 
 | 263 |     if (n->n_type == LSQB) { | 
 | 264 |         REQN(i, 3); | 
 | 265 |         n++; | 
 | 266 |         REQ(n, RHS); | 
 | 267 |         *pa = addnfastate(nf); | 
 | 268 |         *pb = addnfastate(nf); | 
 | 269 |         addnfaarc(nf, *pa, *pb, EMPTY); | 
 | 270 |         compile_rhs(ll, nf, n, &a, &b); | 
 | 271 |         addnfaarc(nf, *pa, a, EMPTY); | 
 | 272 |         addnfaarc(nf, b, *pb, EMPTY); | 
 | 273 |         REQN(i, 1); | 
 | 274 |         n++; | 
 | 275 |         REQ(n, RSQB); | 
 | 276 |     } | 
 | 277 |     else { | 
 | 278 |         compile_atom(ll, nf, n, pa, pb); | 
 | 279 |         if (--i <= 0) | 
 | 280 |             return; | 
 | 281 |         n++; | 
 | 282 |         addnfaarc(nf, *pb, *pa, EMPTY); | 
 | 283 |         if (n->n_type == STAR) | 
 | 284 |             *pb = *pa; | 
 | 285 |         else | 
 | 286 |             REQ(n, PLUS); | 
 | 287 |     } | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 288 | } | 
 | 289 |  | 
| Guido van Rossum | fd8a393 | 1996-12-02 18:27:33 +0000 | [diff] [blame] | 290 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 291 | compile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 292 | { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 293 |     int i; | 
 | 294 |  | 
 | 295 |     REQ(n, ATOM); | 
 | 296 |     i = n->n_nchildren; | 
| Christian Heimes | 5e4d372 | 2013-07-31 23:47:56 +0200 | [diff] [blame] | 297 |     (void)i; /* Don't warn about set but unused */ | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 298 |     REQN(i, 1); | 
 | 299 |     n = n->n_child; | 
 | 300 |     if (n->n_type == LPAR) { | 
 | 301 |         REQN(i, 3); | 
 | 302 |         n++; | 
 | 303 |         REQ(n, RHS); | 
 | 304 |         compile_rhs(ll, nf, n, pa, pb); | 
 | 305 |         n++; | 
 | 306 |         REQ(n, RPAR); | 
 | 307 |     } | 
 | 308 |     else if (n->n_type == NAME || n->n_type == STRING) { | 
 | 309 |         *pa = addnfastate(nf); | 
 | 310 |         *pb = addnfastate(nf); | 
 | 311 |         addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str)); | 
 | 312 |     } | 
 | 313 |     else | 
 | 314 |         REQ(n, NAME); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 315 | } | 
 | 316 |  | 
 | 317 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 318 | dumpstate(labellist *ll, nfa *nf, int istate) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 319 | { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 320 |     nfastate *st; | 
 | 321 |     int i; | 
 | 322 |     nfaarc *ar; | 
 | 323 |  | 
 | 324 |     printf("%c%2d%c", | 
 | 325 |         istate == nf->nf_start ? '*' : ' ', | 
 | 326 |         istate, | 
 | 327 |         istate == nf->nf_finish ? '.' : ' '); | 
 | 328 |     st = &nf->nf_state[istate]; | 
 | 329 |     ar = st->st_arc; | 
 | 330 |     for (i = 0; i < st->st_narcs; i++) { | 
 | 331 |         if (i > 0) | 
 | 332 |             printf("\n    "); | 
 | 333 |         printf("-> %2d  %s", ar->ar_arrow, | 
 | 334 |             PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label])); | 
 | 335 |         ar++; | 
 | 336 |     } | 
 | 337 |     printf("\n"); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 338 | } | 
 | 339 |  | 
 | 340 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 341 | dumpnfa(labellist *ll, nfa *nf) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 342 | { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 343 |     int i; | 
 | 344 |  | 
 | 345 |     printf("NFA '%s' has %d states; start %d, finish %d\n", | 
 | 346 |         nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish); | 
 | 347 |     for (i = 0; i < nf->nf_nstates; i++) | 
 | 348 |         dumpstate(ll, nf, i); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 349 | } | 
 | 350 |  | 
 | 351 |  | 
 | 352 | /* PART TWO -- CONSTRUCT DFA -- Algorithm 3.1 from [Aho&Ullman 77] */ | 
 | 353 |  | 
| Guido van Rossum | 588633d | 1994-12-30 15:46:02 +0000 | [diff] [blame] | 354 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 355 | addclosure(bitset ss, nfa *nf, int istate) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 356 | { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 357 |     if (addbit(ss, istate)) { | 
 | 358 |         nfastate *st = &nf->nf_state[istate]; | 
 | 359 |         nfaarc *ar = st->st_arc; | 
 | 360 |         int i; | 
 | 361 |  | 
 | 362 |         for (i = st->st_narcs; --i >= 0; ) { | 
 | 363 |             if (ar->ar_label == EMPTY) | 
 | 364 |                 addclosure(ss, nf, ar->ar_arrow); | 
 | 365 |             ar++; | 
 | 366 |         } | 
 | 367 |     } | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 368 | } | 
 | 369 |  | 
 | 370 | typedef struct _ss_arc { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 371 |     bitset      sa_bitset; | 
 | 372 |     int         sa_arrow; | 
 | 373 |     int         sa_label; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 374 | } ss_arc; | 
 | 375 |  | 
 | 376 | typedef struct _ss_state { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 377 |     bitset      ss_ss; | 
 | 378 |     int         ss_narcs; | 
 | 379 |     struct _ss_arc      *ss_arc; | 
 | 380 |     int         ss_deleted; | 
 | 381 |     int         ss_finish; | 
 | 382 |     int         ss_rename; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 383 | } ss_state; | 
 | 384 |  | 
 | 385 | typedef struct _ss_dfa { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 386 |     int         sd_nstates; | 
 | 387 |     ss_state *sd_state; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 388 | } ss_dfa; | 
 | 389 |  | 
| Guido van Rossum | 71f477c | 1991-04-03 19:09:02 +0000 | [diff] [blame] | 390 | /* Forward */ | 
| Tim Peters | dbd9ba6 | 2000-07-09 03:09:57 +0000 | [diff] [blame] | 391 | static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits, | 
| Serhiy Storchaka | ef1585e | 2015-12-25 20:01:53 +0200 | [diff] [blame] | 392 |                        labellist *ll, const char *msg); | 
| Tim Peters | dbd9ba6 | 2000-07-09 03:09:57 +0000 | [diff] [blame] | 393 | static void simplify(int xx_nstates, ss_state *xx_state); | 
 | 394 | static void convert(dfa *d, int xx_nstates, ss_state *xx_state); | 
| Guido van Rossum | 71f477c | 1991-04-03 19:09:02 +0000 | [diff] [blame] | 395 |  | 
| Guido van Rossum | fd8a393 | 1996-12-02 18:27:33 +0000 | [diff] [blame] | 396 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 397 | makedfa(nfagrammar *gr, nfa *nf, dfa *d) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 398 | { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 399 |     int nbits = nf->nf_nstates; | 
 | 400 |     bitset ss; | 
 | 401 |     int xx_nstates; | 
 | 402 |     ss_state *xx_state, *yy; | 
 | 403 |     ss_arc *zz; | 
 | 404 |     int istate, jstate, iarc, jarc, ibit; | 
 | 405 |     nfastate *st; | 
 | 406 |     nfaarc *ar; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 407 |  | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 408 |     ss = newbitset(nbits); | 
 | 409 |     addclosure(ss, nf, nf->nf_start); | 
 | 410 |     xx_state = (ss_state *)PyObject_MALLOC(sizeof(ss_state)); | 
 | 411 |     if (xx_state == NULL) | 
 | 412 |         Py_FatalError("no mem for xx_state in makedfa"); | 
 | 413 |     xx_nstates = 1; | 
 | 414 |     yy = &xx_state[0]; | 
 | 415 |     yy->ss_ss = ss; | 
 | 416 |     yy->ss_narcs = 0; | 
 | 417 |     yy->ss_arc = NULL; | 
 | 418 |     yy->ss_deleted = 0; | 
 | 419 |     yy->ss_finish = testbit(ss, nf->nf_finish); | 
 | 420 |     if (yy->ss_finish) | 
 | 421 |         printf("Error: nonterminal '%s' may produce empty.\n", | 
 | 422 |             nf->nf_name); | 
 | 423 |  | 
 | 424 |     /* This algorithm is from a book written before | 
 | 425 |        the invention of structured programming... */ | 
 | 426 |  | 
 | 427 |     /* For each unmarked state... */ | 
 | 428 |     for (istate = 0; istate < xx_nstates; ++istate) { | 
 | 429 |         size_t size; | 
 | 430 |         yy = &xx_state[istate]; | 
 | 431 |         ss = yy->ss_ss; | 
 | 432 |         /* For all its states... */ | 
 | 433 |         for (ibit = 0; ibit < nf->nf_nstates; ++ibit) { | 
 | 434 |             if (!testbit(ss, ibit)) | 
 | 435 |                 continue; | 
 | 436 |             st = &nf->nf_state[ibit]; | 
 | 437 |             /* For all non-empty arcs from this state... */ | 
 | 438 |             for (iarc = 0; iarc < st->st_narcs; iarc++) { | 
 | 439 |                 ar = &st->st_arc[iarc]; | 
 | 440 |                 if (ar->ar_label == EMPTY) | 
 | 441 |                     continue; | 
 | 442 |                 /* Look up in list of arcs from this state */ | 
 | 443 |                 for (jarc = 0; jarc < yy->ss_narcs; ++jarc) { | 
 | 444 |                     zz = &yy->ss_arc[jarc]; | 
 | 445 |                     if (ar->ar_label == zz->sa_label) | 
 | 446 |                         goto found; | 
 | 447 |                 } | 
 | 448 |                 /* Add new arc for this state */ | 
 | 449 |                 size = sizeof(ss_arc) * (yy->ss_narcs + 1); | 
 | 450 |                 yy->ss_arc = (ss_arc *)PyObject_REALLOC( | 
 | 451 |                                             yy->ss_arc, size); | 
 | 452 |                 if (yy->ss_arc == NULL) | 
 | 453 |                     Py_FatalError("out of mem"); | 
 | 454 |                 zz = &yy->ss_arc[yy->ss_narcs++]; | 
 | 455 |                 zz->sa_label = ar->ar_label; | 
 | 456 |                 zz->sa_bitset = newbitset(nbits); | 
 | 457 |                 zz->sa_arrow = -1; | 
 | 458 |              found:             ; | 
 | 459 |                 /* Add destination */ | 
 | 460 |                 addclosure(zz->sa_bitset, nf, ar->ar_arrow); | 
 | 461 |             } | 
 | 462 |         } | 
 | 463 |         /* Now look up all the arrow states */ | 
 | 464 |         for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) { | 
 | 465 |             zz = &xx_state[istate].ss_arc[jarc]; | 
 | 466 |             for (jstate = 0; jstate < xx_nstates; jstate++) { | 
 | 467 |                 if (samebitset(zz->sa_bitset, | 
 | 468 |                     xx_state[jstate].ss_ss, nbits)) { | 
 | 469 |                     zz->sa_arrow = jstate; | 
 | 470 |                     goto done; | 
 | 471 |                 } | 
 | 472 |             } | 
 | 473 |             size = sizeof(ss_state) * (xx_nstates + 1); | 
 | 474 |             xx_state = (ss_state *)PyObject_REALLOC(xx_state, | 
 | 475 |                                                         size); | 
 | 476 |             if (xx_state == NULL) | 
 | 477 |                 Py_FatalError("out of mem"); | 
 | 478 |             zz->sa_arrow = xx_nstates; | 
 | 479 |             yy = &xx_state[xx_nstates++]; | 
 | 480 |             yy->ss_ss = zz->sa_bitset; | 
 | 481 |             yy->ss_narcs = 0; | 
 | 482 |             yy->ss_arc = NULL; | 
 | 483 |             yy->ss_deleted = 0; | 
 | 484 |             yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish); | 
 | 485 |          done:          ; | 
 | 486 |         } | 
 | 487 |     } | 
 | 488 |  | 
 | 489 |     if (Py_DebugFlag) | 
 | 490 |         printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll, | 
 | 491 |                                         "before minimizing"); | 
 | 492 |  | 
 | 493 |     simplify(xx_nstates, xx_state); | 
 | 494 |  | 
 | 495 |     if (Py_DebugFlag) | 
 | 496 |         printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll, | 
 | 497 |                                         "after minimizing"); | 
 | 498 |  | 
 | 499 |     convert(d, xx_nstates, xx_state); | 
 | 500 |  | 
| Benjamin Peterson | 9ac11a7 | 2016-09-18 18:00:25 -0700 | [diff] [blame] | 501 |     for (int i = 0; i < xx_nstates; i++) { | 
 | 502 |         for (int j = 0; j < xx_state[i].ss_narcs; j++) | 
 | 503 |             delbitset(xx_state[i].ss_arc[j].sa_bitset); | 
 | 504 |         PyObject_FREE(xx_state[i].ss_arc); | 
 | 505 |     } | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 506 |     PyObject_FREE(xx_state); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 507 | } | 
 | 508 |  | 
| Guido van Rossum | fd8a393 | 1996-12-02 18:27:33 +0000 | [diff] [blame] | 509 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 510 | printssdfa(int xx_nstates, ss_state *xx_state, int nbits, | 
| Serhiy Storchaka | ef1585e | 2015-12-25 20:01:53 +0200 | [diff] [blame] | 511 |            labellist *ll, const char *msg) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 512 | { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 513 |     int i, ibit, iarc; | 
 | 514 |     ss_state *yy; | 
 | 515 |     ss_arc *zz; | 
 | 516 |  | 
 | 517 |     printf("Subset DFA %s\n", msg); | 
 | 518 |     for (i = 0; i < xx_nstates; i++) { | 
 | 519 |         yy = &xx_state[i]; | 
 | 520 |         if (yy->ss_deleted) | 
 | 521 |             continue; | 
 | 522 |         printf(" Subset %d", i); | 
 | 523 |         if (yy->ss_finish) | 
 | 524 |             printf(" (finish)"); | 
 | 525 |         printf(" { "); | 
 | 526 |         for (ibit = 0; ibit < nbits; ibit++) { | 
 | 527 |             if (testbit(yy->ss_ss, ibit)) | 
 | 528 |                 printf("%d ", ibit); | 
 | 529 |         } | 
 | 530 |         printf("}\n"); | 
 | 531 |         for (iarc = 0; iarc < yy->ss_narcs; iarc++) { | 
 | 532 |             zz = &yy->ss_arc[iarc]; | 
 | 533 |             printf("  Arc to state %d, label %s\n", | 
 | 534 |                 zz->sa_arrow, | 
 | 535 |                 PyGrammar_LabelRepr( | 
 | 536 |                     &ll->ll_label[zz->sa_label])); | 
 | 537 |         } | 
 | 538 |     } | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 539 | } | 
 | 540 |  | 
 | 541 |  | 
 | 542 | /* PART THREE -- SIMPLIFY DFA */ | 
 | 543 |  | 
 | 544 | /* Simplify the DFA by repeatedly eliminating states that are | 
 | 545 |    equivalent to another oner.  This is NOT Algorithm 3.3 from | 
 | 546 |    [Aho&Ullman 77].  It does not always finds the minimal DFA, | 
 | 547 |    but it does usually make a much smaller one...  (For an example | 
| Thomas Wouters | 7e47402 | 2000-07-16 12:04:32 +0000 | [diff] [blame] | 548 |    of sub-optimal behavior, try S: x a b+ | y a b+.) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 549 | */ | 
 | 550 |  | 
 | 551 | static int | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 552 | samestate(ss_state *s1, ss_state *s2) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 553 | { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 554 |     int i; | 
 | 555 |  | 
 | 556 |     if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish) | 
 | 557 |         return 0; | 
 | 558 |     for (i = 0; i < s1->ss_narcs; i++) { | 
 | 559 |         if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow || | 
 | 560 |             s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label) | 
 | 561 |             return 0; | 
 | 562 |     } | 
 | 563 |     return 1; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 564 | } | 
 | 565 |  | 
 | 566 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 567 | renamestates(int xx_nstates, ss_state *xx_state, int from, int to) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 568 | { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 569 |     int i, j; | 
 | 570 |  | 
 | 571 |     if (Py_DebugFlag) | 
 | 572 |         printf("Rename state %d to %d.\n", from, to); | 
 | 573 |     for (i = 0; i < xx_nstates; i++) { | 
 | 574 |         if (xx_state[i].ss_deleted) | 
 | 575 |             continue; | 
 | 576 |         for (j = 0; j < xx_state[i].ss_narcs; j++) { | 
 | 577 |             if (xx_state[i].ss_arc[j].sa_arrow == from) | 
 | 578 |                 xx_state[i].ss_arc[j].sa_arrow = to; | 
 | 579 |         } | 
 | 580 |     } | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 581 | } | 
 | 582 |  | 
| Guido van Rossum | fd8a393 | 1996-12-02 18:27:33 +0000 | [diff] [blame] | 583 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 584 | simplify(int xx_nstates, ss_state *xx_state) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 585 | { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 586 |     int changes; | 
 | 587 |     int i, j; | 
 | 588 |  | 
 | 589 |     do { | 
 | 590 |         changes = 0; | 
 | 591 |         for (i = 1; i < xx_nstates; i++) { | 
 | 592 |             if (xx_state[i].ss_deleted) | 
 | 593 |                 continue; | 
 | 594 |             for (j = 0; j < i; j++) { | 
 | 595 |                 if (xx_state[j].ss_deleted) | 
 | 596 |                     continue; | 
 | 597 |                 if (samestate(&xx_state[i], &xx_state[j])) { | 
 | 598 |                     xx_state[i].ss_deleted++; | 
 | 599 |                     renamestates(xx_nstates, xx_state, | 
 | 600 |                                  i, j); | 
 | 601 |                     changes++; | 
 | 602 |                     break; | 
 | 603 |                 } | 
 | 604 |             } | 
 | 605 |         } | 
 | 606 |     } while (changes); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 607 | } | 
 | 608 |  | 
 | 609 |  | 
 | 610 | /* PART FOUR -- GENERATE PARSING TABLES */ | 
 | 611 |  | 
 | 612 | /* Convert the DFA into a grammar that can be used by our parser */ | 
 | 613 |  | 
| Guido van Rossum | fd8a393 | 1996-12-02 18:27:33 +0000 | [diff] [blame] | 614 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 615 | convert(dfa *d, int xx_nstates, ss_state *xx_state) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 616 | { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 617 |     int i, j; | 
 | 618 |     ss_state *yy; | 
 | 619 |     ss_arc *zz; | 
 | 620 |  | 
 | 621 |     for (i = 0; i < xx_nstates; i++) { | 
 | 622 |         yy = &xx_state[i]; | 
 | 623 |         if (yy->ss_deleted) | 
 | 624 |             continue; | 
 | 625 |         yy->ss_rename = addstate(d); | 
 | 626 |     } | 
 | 627 |  | 
 | 628 |     for (i = 0; i < xx_nstates; i++) { | 
 | 629 |         yy = &xx_state[i]; | 
 | 630 |         if (yy->ss_deleted) | 
 | 631 |             continue; | 
 | 632 |         for (j = 0; j < yy->ss_narcs; j++) { | 
 | 633 |             zz = &yy->ss_arc[j]; | 
 | 634 |             addarc(d, yy->ss_rename, | 
 | 635 |                 xx_state[zz->sa_arrow].ss_rename, | 
 | 636 |                 zz->sa_label); | 
 | 637 |         } | 
 | 638 |         if (yy->ss_finish) | 
 | 639 |             addarc(d, yy->ss_rename, yy->ss_rename, 0); | 
 | 640 |     } | 
 | 641 |  | 
 | 642 |     d->d_initial = 0; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 643 | } | 
 | 644 |  | 
 | 645 |  | 
 | 646 | /* PART FIVE -- GLUE IT ALL TOGETHER */ | 
 | 647 |  | 
 | 648 | static grammar * | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 649 | maketables(nfagrammar *gr) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 650 | { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 651 |     int i; | 
 | 652 |     nfa *nf; | 
 | 653 |     dfa *d; | 
 | 654 |     grammar *g; | 
 | 655 |  | 
 | 656 |     if (gr->gr_nnfas == 0) | 
 | 657 |         return NULL; | 
 | 658 |     g = newgrammar(gr->gr_nfa[0]->nf_type); | 
 | 659 |                     /* XXX first rule must be start rule */ | 
 | 660 |     g->g_ll = gr->gr_ll; | 
 | 661 |  | 
 | 662 |     for (i = 0; i < gr->gr_nnfas; i++) { | 
 | 663 |         nf = gr->gr_nfa[i]; | 
 | 664 |         if (Py_DebugFlag) { | 
 | 665 |             printf("Dump of NFA for '%s' ...\n", nf->nf_name); | 
 | 666 |             dumpnfa(&gr->gr_ll, nf); | 
 | 667 |             printf("Making DFA for '%s' ...\n", nf->nf_name); | 
 | 668 |         } | 
 | 669 |         d = adddfa(g, nf->nf_type, nf->nf_name); | 
 | 670 |         makedfa(gr, gr->gr_nfa[i], d); | 
 | 671 |     } | 
 | 672 |  | 
 | 673 |     return g; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 674 | } | 
 | 675 |  | 
 | 676 | grammar * | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 677 | pgen(node *n) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 678 | { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 679 |     nfagrammar *gr; | 
 | 680 |     grammar *g; | 
 | 681 |  | 
 | 682 |     gr = metacompile(n); | 
 | 683 |     g = maketables(gr); | 
 | 684 |     translatelabels(g); | 
 | 685 |     addfirstsets(g); | 
| Benjamin Peterson | 9ac11a7 | 2016-09-18 18:00:25 -0700 | [diff] [blame] | 686 |     freenfagrammar(gr); | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 687 |     return g; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 688 | } | 
 | 689 |  | 
| Guido van Rossum | d3ab37f | 2003-04-17 14:55:42 +0000 | [diff] [blame] | 690 | grammar * | 
 | 691 | Py_pgen(node *n) | 
 | 692 | { | 
 | 693 |   return pgen(n); | 
 | 694 | } | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 695 |  | 
 | 696 | /* | 
 | 697 |  | 
 | 698 | Description | 
 | 699 | ----------- | 
 | 700 |  | 
 | 701 | Input is a grammar in extended BNF (using * for repetition, + for | 
 | 702 | at-least-once repetition, [] for optional parts, | for alternatives and | 
 | 703 | () for grouping).  This has already been parsed and turned into a parse | 
 | 704 | tree. | 
 | 705 |  | 
 | 706 | Each rule is considered as a regular expression in its own right. | 
 | 707 | It is turned into a Non-deterministic Finite Automaton (NFA), which | 
 | 708 | is then turned into a Deterministic Finite Automaton (DFA), which is then | 
 | 709 | optimized to reduce the number of states.  See [Aho&Ullman 77] chapter 3, | 
 | 710 | or similar compiler books (this technique is more often used for lexical | 
 | 711 | analyzers). | 
 | 712 |  | 
 | 713 | The DFA's are used by the parser as parsing tables in a special way | 
 | 714 | that's probably unique.  Before they are usable, the FIRST sets of all | 
 | 715 | non-terminals are computed. | 
 | 716 |  | 
 | 717 | Reference | 
 | 718 | --------- | 
 | 719 |  | 
 | 720 | [Aho&Ullman 77] | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 721 |     Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977 | 
 | 722 |     (first edition) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 723 |  | 
 | 724 | */ |