| Guido van Rossum | f70e43a | 1991-02-19 12:39:46 +0000 | [diff] [blame] | 1 |  | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 2 | /* Grammar implementation */ | 
 | 3 |  | 
| Tim Peters | 1ca1296 | 2001-12-04 03:18:48 +0000 | [diff] [blame] | 4 | #include "Python.h" | 
| Guido van Rossum | 3f5da24 | 1990-12-20 15:06:42 +0000 | [diff] [blame] | 5 | #include "pgenheaders.h" | 
 | 6 |  | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 7 | #include <ctype.h> | 
 | 8 |  | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 9 | #include "token.h" | 
 | 10 | #include "grammar.h" | 
 | 11 |  | 
| Martin v. Löwis | a94568a | 2003-05-10 07:36:56 +0000 | [diff] [blame] | 12 | #ifdef RISCOS | 
 | 13 | #include <unixlib.h> | 
 | 14 | #endif | 
 | 15 |  | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 16 | extern int Py_DebugFlag; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 17 |  | 
 | 18 | grammar * | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 19 | newgrammar(int start) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 20 | { | 
 | 21 | 	grammar *g; | 
 | 22 | 	 | 
| Anthony Baxter | 1149002 | 2006-04-11 05:39:14 +0000 | [diff] [blame] | 23 | 	g = (grammar *)PyObject_MALLOC(sizeof(grammar)); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 24 | 	if (g == NULL) | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 25 | 		Py_FatalError("no mem for new grammar"); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 26 | 	g->g_ndfas = 0; | 
 | 27 | 	g->g_dfa = NULL; | 
 | 28 | 	g->g_start = start; | 
 | 29 | 	g->g_ll.ll_nlabels = 0; | 
 | 30 | 	g->g_ll.ll_label = NULL; | 
| Guido van Rossum | 588633d | 1994-12-30 15:46:02 +0000 | [diff] [blame] | 31 | 	g->g_accel = 0; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 32 | 	return g; | 
 | 33 | } | 
 | 34 |  | 
 | 35 | dfa * | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 36 | adddfa(grammar *g, int type, char *name) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 37 | { | 
 | 38 | 	dfa *d; | 
 | 39 | 	 | 
| Anthony Baxter | 1149002 | 2006-04-11 05:39:14 +0000 | [diff] [blame] | 40 | 	g->g_dfa = (dfa *)PyObject_REALLOC(g->g_dfa,  | 
 | 41 |                                             sizeof(dfa) * (g->g_ndfas + 1)); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 42 | 	if (g->g_dfa == NULL) | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 43 | 		Py_FatalError("no mem to resize dfa in adddfa"); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 44 | 	d = &g->g_dfa[g->g_ndfas++]; | 
 | 45 | 	d->d_type = type; | 
| Guido van Rossum | d3ab37f | 2003-04-17 14:55:42 +0000 | [diff] [blame] | 46 | 	d->d_name = strdup(name); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 47 | 	d->d_nstates = 0; | 
 | 48 | 	d->d_state = NULL; | 
 | 49 | 	d->d_initial = -1; | 
 | 50 | 	d->d_first = NULL; | 
 | 51 | 	return d; /* Only use while fresh! */ | 
 | 52 | } | 
 | 53 |  | 
 | 54 | int | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 55 | addstate(dfa *d) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 56 | { | 
 | 57 | 	state *s; | 
 | 58 | 	 | 
| Anthony Baxter | 1149002 | 2006-04-11 05:39:14 +0000 | [diff] [blame] | 59 | 	d->d_state = (state *)PyObject_REALLOC(d->d_state, | 
| Neal Norwitz | 2c4e4f9 | 2006-04-10 06:42:25 +0000 | [diff] [blame] | 60 | 				      sizeof(state) * (d->d_nstates + 1)); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 61 | 	if (d->d_state == NULL) | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 62 | 		Py_FatalError("no mem to resize state in addstate"); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 63 | 	s = &d->d_state[d->d_nstates++]; | 
 | 64 | 	s->s_narcs = 0; | 
 | 65 | 	s->s_arc = NULL; | 
| Guido van Rossum | 588633d | 1994-12-30 15:46:02 +0000 | [diff] [blame] | 66 | 	s->s_lower = 0; | 
 | 67 | 	s->s_upper = 0; | 
 | 68 | 	s->s_accel = NULL; | 
 | 69 | 	s->s_accept = 0; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 70 | 	return s - d->d_state; | 
 | 71 | } | 
 | 72 |  | 
 | 73 | void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 74 | addarc(dfa *d, int from, int to, int lbl) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 75 | { | 
 | 76 | 	state *s; | 
 | 77 | 	arc *a; | 
 | 78 | 	 | 
 | 79 | 	assert(0 <= from && from < d->d_nstates); | 
 | 80 | 	assert(0 <= to && to < d->d_nstates); | 
 | 81 | 	 | 
 | 82 | 	s = &d->d_state[from]; | 
| Anthony Baxter | 1149002 | 2006-04-11 05:39:14 +0000 | [diff] [blame] | 83 | 	s->s_arc = (arc *)PyObject_REALLOC(s->s_arc, sizeof(arc) * (s->s_narcs + 1)); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 84 | 	if (s->s_arc == NULL) | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 85 | 		Py_FatalError("no mem to resize arc list in addarc"); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 86 | 	a = &s->s_arc[s->s_narcs++]; | 
 | 87 | 	a->a_lbl = lbl; | 
 | 88 | 	a->a_arrow = to; | 
 | 89 | } | 
 | 90 |  | 
 | 91 | int | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 92 | addlabel(labellist *ll, int type, char *str) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 93 | { | 
 | 94 | 	int i; | 
 | 95 | 	label *lb; | 
 | 96 | 	 | 
 | 97 | 	for (i = 0; i < ll->ll_nlabels; i++) { | 
 | 98 | 		if (ll->ll_label[i].lb_type == type && | 
 | 99 | 			strcmp(ll->ll_label[i].lb_str, str) == 0) | 
 | 100 | 			return i; | 
 | 101 | 	} | 
| Anthony Baxter | 1149002 | 2006-04-11 05:39:14 +0000 | [diff] [blame] | 102 | 	ll->ll_label = (label *)PyObject_REALLOC(ll->ll_label, | 
| Neal Norwitz | 2c4e4f9 | 2006-04-10 06:42:25 +0000 | [diff] [blame] | 103 | 					sizeof(label) * (ll->ll_nlabels + 1)); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 104 | 	if (ll->ll_label == NULL) | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 105 | 		Py_FatalError("no mem to resize labellist in addlabel"); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 106 | 	lb = &ll->ll_label[ll->ll_nlabels++]; | 
 | 107 | 	lb->lb_type = type; | 
| Guido van Rossum | d3ab37f | 2003-04-17 14:55:42 +0000 | [diff] [blame] | 108 | 	lb->lb_str = strdup(str); | 
 | 109 | 	if (Py_DebugFlag) | 
| Guido van Rossum | c696617 | 2004-03-20 22:34:14 +0000 | [diff] [blame] | 110 | 		printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels, | 
| Guido van Rossum | d3ab37f | 2003-04-17 14:55:42 +0000 | [diff] [blame] | 111 | 		       PyGrammar_LabelRepr(lb)); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 112 | 	return lb - ll->ll_label; | 
 | 113 | } | 
 | 114 |  | 
 | 115 | /* Same, but rather dies than adds */ | 
 | 116 |  | 
 | 117 | int | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 118 | findlabel(labellist *ll, int type, char *str) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 119 | { | 
 | 120 | 	int i; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 121 | 	 | 
 | 122 | 	for (i = 0; i < ll->ll_nlabels; i++) { | 
 | 123 | 		if (ll->ll_label[i].lb_type == type /*&& | 
 | 124 | 			strcmp(ll->ll_label[i].lb_str, str) == 0*/) | 
 | 125 | 			return i; | 
 | 126 | 	} | 
 | 127 | 	fprintf(stderr, "Label %d/'%s' not found\n", type, str); | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 128 | 	Py_FatalError("grammar.c:findlabel()"); | 
| Guido van Rossum | fd8a393 | 1996-12-02 18:27:33 +0000 | [diff] [blame] | 129 | 	return 0; /* Make gcc -Wall happy */ | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 130 | } | 
 | 131 |  | 
| Guido van Rossum | 3f5da24 | 1990-12-20 15:06:42 +0000 | [diff] [blame] | 132 | /* Forward */ | 
| Tim Peters | dbd9ba6 | 2000-07-09 03:09:57 +0000 | [diff] [blame] | 133 | static void translabel(grammar *, label *); | 
| Guido van Rossum | 3f5da24 | 1990-12-20 15:06:42 +0000 | [diff] [blame] | 134 |  | 
 | 135 | void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 136 | translatelabels(grammar *g) | 
| Guido van Rossum | 3f5da24 | 1990-12-20 15:06:42 +0000 | [diff] [blame] | 137 | { | 
 | 138 | 	int i; | 
| Guido van Rossum | 588633d | 1994-12-30 15:46:02 +0000 | [diff] [blame] | 139 |  | 
| Guido van Rossum | 408027e | 1996-12-30 16:17:54 +0000 | [diff] [blame] | 140 | #ifdef Py_DEBUG | 
| Guido van Rossum | 3f5da24 | 1990-12-20 15:06:42 +0000 | [diff] [blame] | 141 | 	printf("Translating labels ...\n"); | 
| Guido van Rossum | 588633d | 1994-12-30 15:46:02 +0000 | [diff] [blame] | 142 | #endif | 
| Guido van Rossum | 3f5da24 | 1990-12-20 15:06:42 +0000 | [diff] [blame] | 143 | 	/* Don't translate EMPTY */ | 
 | 144 | 	for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++) | 
 | 145 | 		translabel(g, &g->g_ll.ll_label[i]); | 
 | 146 | } | 
 | 147 |  | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 148 | static void | 
| Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 149 | translabel(grammar *g, label *lb) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 150 | { | 
 | 151 | 	int i; | 
 | 152 | 	 | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 153 | 	if (Py_DebugFlag) | 
 | 154 | 		printf("Translating label %s ...\n", PyGrammar_LabelRepr(lb)); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 155 | 	 | 
 | 156 | 	if (lb->lb_type == NAME) { | 
 | 157 | 		for (i = 0; i < g->g_ndfas; i++) { | 
 | 158 | 			if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) { | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 159 | 				if (Py_DebugFlag) | 
 | 160 | 					printf( | 
 | 161 | 					    "Label %s is non-terminal %d.\n", | 
 | 162 | 					    lb->lb_str, | 
 | 163 | 					    g->g_dfa[i].d_type); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 164 | 				lb->lb_type = g->g_dfa[i].d_type; | 
| Guido van Rossum | d3ab37f | 2003-04-17 14:55:42 +0000 | [diff] [blame] | 165 | 				free(lb->lb_str); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 166 | 				lb->lb_str = NULL; | 
 | 167 | 				return; | 
 | 168 | 			} | 
 | 169 | 		} | 
 | 170 | 		for (i = 0; i < (int)N_TOKENS; i++) { | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 171 | 			if (strcmp(lb->lb_str, _PyParser_TokenNames[i]) == 0) { | 
 | 172 | 				if (Py_DebugFlag) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 173 | 					printf("Label %s is terminal %d.\n", | 
 | 174 | 						lb->lb_str, i); | 
 | 175 | 				lb->lb_type = i; | 
| Guido van Rossum | d3ab37f | 2003-04-17 14:55:42 +0000 | [diff] [blame] | 176 | 				free(lb->lb_str); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 177 | 				lb->lb_str = NULL; | 
 | 178 | 				return; | 
 | 179 | 			} | 
 | 180 | 		} | 
 | 181 | 		printf("Can't translate NAME label '%s'\n", lb->lb_str); | 
 | 182 | 		return; | 
 | 183 | 	} | 
 | 184 | 	 | 
 | 185 | 	if (lb->lb_type == STRING) { | 
| Neal Norwitz | 30b5c5d | 2005-12-19 06:05:18 +0000 | [diff] [blame] | 186 | 		if (isalpha(Py_CHARMASK(lb->lb_str[1])) || | 
 | 187 | 		    lb->lb_str[1] == '_') { | 
| Guido van Rossum | 1d5735e | 1994-08-30 08:27:36 +0000 | [diff] [blame] | 188 | 			char *p; | 
| Guido van Rossum | d3ab37f | 2003-04-17 14:55:42 +0000 | [diff] [blame] | 189 | 			char *src; | 
 | 190 | 			char *dest; | 
 | 191 | 			size_t name_len; | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 192 | 			if (Py_DebugFlag) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 193 | 				printf("Label %s is a keyword\n", lb->lb_str); | 
 | 194 | 			lb->lb_type = NAME; | 
| Guido van Rossum | d3ab37f | 2003-04-17 14:55:42 +0000 | [diff] [blame] | 195 | 			src = lb->lb_str + 1; | 
 | 196 | 			p = strchr(src, '\''); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 197 | 			if (p) | 
| Guido van Rossum | d3ab37f | 2003-04-17 14:55:42 +0000 | [diff] [blame] | 198 | 				name_len = p - src; | 
 | 199 | 			else | 
 | 200 | 				name_len = strlen(src); | 
| Anthony Baxter | 1149002 | 2006-04-11 05:39:14 +0000 | [diff] [blame] | 201 | 			dest = (char *)malloc(name_len + 1); | 
| Neal Norwitz | 9ac8953 | 2006-08-13 18:13:36 +0000 | [diff] [blame] | 202 | 			if (!dest) { | 
 | 203 | 				printf("Can't alloc dest '%s'\n", src); | 
 | 204 | 				return; | 
 | 205 | 			} | 
| Guido van Rossum | d3ab37f | 2003-04-17 14:55:42 +0000 | [diff] [blame] | 206 | 			strncpy(dest, src, name_len); | 
 | 207 | 			dest[name_len] = '\0'; | 
 | 208 | 			free(lb->lb_str); | 
 | 209 | 			lb->lb_str = dest; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 210 | 		} | 
| Guido van Rossum | c64d04d | 1991-10-20 20:20:00 +0000 | [diff] [blame] | 211 | 		else if (lb->lb_str[2] == lb->lb_str[0]) { | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 212 | 			int type = (int) PyToken_OneChar(lb->lb_str[1]); | 
| Guido van Rossum | c64d04d | 1991-10-20 20:20:00 +0000 | [diff] [blame] | 213 | 			if (type != OP) { | 
 | 214 | 				lb->lb_type = type; | 
| Guido van Rossum | d3ab37f | 2003-04-17 14:55:42 +0000 | [diff] [blame] | 215 | 				free(lb->lb_str); | 
| Guido van Rossum | c64d04d | 1991-10-20 20:20:00 +0000 | [diff] [blame] | 216 | 				lb->lb_str = NULL; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 217 | 			} | 
 | 218 | 			else | 
| Guido van Rossum | c64d04d | 1991-10-20 20:20:00 +0000 | [diff] [blame] | 219 | 				printf("Unknown OP label %s\n", | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 220 | 					lb->lb_str); | 
 | 221 | 		} | 
| Guido van Rossum | c64d04d | 1991-10-20 20:20:00 +0000 | [diff] [blame] | 222 | 		else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) { | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 223 | 			int type = (int) PyToken_TwoChars(lb->lb_str[1], | 
| Guido van Rossum | c64d04d | 1991-10-20 20:20:00 +0000 | [diff] [blame] | 224 | 						   lb->lb_str[2]); | 
 | 225 | 			if (type != OP) { | 
 | 226 | 				lb->lb_type = type; | 
| Guido van Rossum | d3ab37f | 2003-04-17 14:55:42 +0000 | [diff] [blame] | 227 | 				free(lb->lb_str); | 
| Guido van Rossum | c64d04d | 1991-10-20 20:20:00 +0000 | [diff] [blame] | 228 | 				lb->lb_str = NULL; | 
 | 229 | 			} | 
 | 230 | 			else | 
 | 231 | 				printf("Unknown OP label %s\n", | 
 | 232 | 					lb->lb_str); | 
 | 233 | 		} | 
| Thomas Wouters | 434d082 | 2000-08-24 20:11:32 +0000 | [diff] [blame] | 234 | 		else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) { | 
 | 235 | 			int type = (int) PyToken_ThreeChars(lb->lb_str[1], | 
 | 236 | 							    lb->lb_str[2], | 
 | 237 | 							    lb->lb_str[3]); | 
 | 238 | 			if (type != OP) { | 
 | 239 | 				lb->lb_type = type; | 
| Guido van Rossum | d3ab37f | 2003-04-17 14:55:42 +0000 | [diff] [blame] | 240 | 				free(lb->lb_str); | 
| Thomas Wouters | 434d082 | 2000-08-24 20:11:32 +0000 | [diff] [blame] | 241 | 				lb->lb_str = NULL; | 
 | 242 | 			} | 
 | 243 | 			else | 
 | 244 | 				printf("Unknown OP label %s\n", | 
 | 245 | 					lb->lb_str); | 
 | 246 | 		} | 
| Guido van Rossum | c64d04d | 1991-10-20 20:20:00 +0000 | [diff] [blame] | 247 | 		else | 
 | 248 | 			printf("Can't translate STRING label %s\n", | 
 | 249 | 				lb->lb_str); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 250 | 	} | 
 | 251 | 	else | 
| Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 252 | 		printf("Can't translate label '%s'\n", | 
 | 253 | 		       PyGrammar_LabelRepr(lb)); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 254 | } |