Blame - Parser/parser.c - platform/external/python/cpython3

blob: e0b3530f511b5b610d46ec5d194c175c2644d9c0 [file] [log] [blame]

Guido van Rossum	85a5fbb	1990-10-14 12:07:46 +0000	[diff] [blame]	1	/* Parser implementation */
				2
				3	/* For a description, see the comments at end of this file */
				4
				5	/* XXX To do: error recovery */
				6
				7	#include <stdio.h>
				8	#include "assert.h"
				9
				10	#include "PROTO.h"
				11	#include "malloc.h"
				12	#include "token.h"
				13	#include "grammar.h"
				14	#include "node.h"
				15	#include "parser.h"
				16	#include "errcode.h"
				17
				18	extern int debugging;
				19
				20	#ifdef DEBUG
				21	#define D(x) if (!debugging); else x
				22	#else
				23	#define D(x)
				24	#endif
				25
				26
				27	/* STACK DATA TYPE */
				28
				29	static void s_reset PROTO((stack *));
				30
				31	static void
				32	s_reset(s)
				33	stack *s;
				34	{
				35	s->s_top = &s->s_base[MAXSTACK];
				36	}
				37
				38	#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
				39
				40	static int s_push PROTO((stack , dfa , node *));
				41
				42	static int
				43	s_push(s, d, parent)
				44	register stack *s;
				45	dfa *d;
				46	node *parent;
				47	{
				48	register stackentry *top;
				49	if (s->s_top == s->s_base) {
				50	fprintf(stderr, "s_push: parser stack overflow\n");
				51	return -1;
				52	}
				53	top = --s->s_top;
				54	top->s_dfa = d;
				55	top->s_parent = parent;
				56	top->s_state = 0;
				57	return 0;
				58	}
				59
				60	#ifdef DEBUG
				61
				62	static void s_pop PROTO((stack *));
				63
				64	static void
				65	s_pop(s)
				66	register stack *s;
				67	{
				68	if (s_empty(s)) {
				69	fprintf(stderr, "s_pop: parser stack underflow -- FATAL\n");
				70	abort();
				71	}
				72	s->s_top++;
				73	}
				74
				75	#else /* !DEBUG */
				76
				77	#define s_pop(s) (s)->s_top++
				78
				79	#endif
				80
				81
				82	/* PARSER CREATION */
				83
				84	parser_state *
				85	newparser(g, start)
				86	grammar *g;
				87	int start;
				88	{
				89	parser_state *ps;
				90
				91	if (!g->g_accel)
				92	addaccelerators(g);
				93	ps = NEW(parser_state, 1);
				94	if (ps == NULL)
				95	return NULL;
				96	ps->p_grammar = g;
				97	ps->p_tree = newnode(start);
				98	if (ps->p_tree == NULL) {
				99	if (ps->p_tree != NULL)
				100	DEL(ps->p_tree); /* XXX freeing a node!?! */
				101	DEL(ps);
				102	return NULL;
				103	}
				104	s_reset(&ps->p_stack);
				105	(void) s_push(&ps->p_stack, finddfa(g, start), ps->p_tree);
				106	return ps;
				107	}
				108
				109	void
				110	delparser(ps)
				111	parser_state *ps;
				112	{
				113	DEL(ps);
				114	}
				115
				116
				117	/* PARSER STACK OPERATIONS */
				118
				119	static int shift PROTO((stack , int, char , int));
				120
				121	static int
				122	shift(s, type, str, newstate)
				123	register stack *s;
				124	int type;
				125	char *str;
				126	int newstate;
				127	{
				128	assert(!s_empty(s));
				129	if (addchild(s->s_top->s_parent, type, str) == NULL) {
				130	fprintf(stderr, "shift: no mem in addchild\n");
				131	return -1;
				132	}
				133	s->s_top->s_state = newstate;
				134	return 0;
				135	}
				136
				137	static int push PROTO((stack , int, dfa , int));
				138
				139	static int
				140	push(s, type, d, newstate)
				141	register stack *s;
				142	int type;
				143	dfa *d;
				144	int newstate;
				145	{
				146	register node *n;
				147	n = s->s_top->s_parent;
				148	assert(!s_empty(s));
				149	if (addchild(n, type, (char *)NULL) == NULL) {
				150	fprintf(stderr, "push: no mem in addchild\n");
				151	return -1;
				152	}
				153	s->s_top->s_state = newstate;
				154	return s_push(s, d, CHILD(n, NCH(n)-1));
				155	}
				156
				157
				158	/* PARSER PROPER */
				159
				160	static int classify PROTO((grammar , int, char ));
				161
				162	static int
				163	classify(g, type, str)
				164	grammar *g;
				165	register int type;
				166	char *str;
				167	{
				168	register int n = g->g_ll.ll_nlabels;
				169
				170	if (type == NAME) {
				171	register char *s = str;
				172	register label *l = g->g_ll.ll_label;
				173	register int i;
				174	for (i = n; i > 0; i--, l++) {
				175	if (l->lb_type == NAME && l->lb_str != NULL &&
				176	l->lb_str[0] == s[0] &&
				177	strcmp(l->lb_str, s) == 0) {
				178	D(printf("It's a keyword\n"));
				179	return n - i;
				180	}
				181	}
				182	}
				183
				184	{
				185	register label *l = g->g_ll.ll_label;
				186	register int i;
				187	for (i = n; i > 0; i--, l++) {
				188	if (l->lb_type == type && l->lb_str == NULL) {
				189	D(printf("It's a token we know\n"));
				190	return n - i;
				191	}
				192	}
				193	}
				194
				195	D(printf("Illegal token\n"));
				196	return -1;
				197	}
				198
				199	int
				200	addtoken(ps, type, str)
				201	register parser_state *ps;
				202	register int type;
				203	char *str;
				204	{
				205	register int ilabel;
				206
				207	D(printf("Token %s/'%s' ... ", tok_name[type], str));
				208
				209	/* Find out which label this token is */
				210	ilabel = classify(ps->p_grammar, type, str);
				211	if (ilabel < 0)
				212	return E_SYNTAX;
				213
				214	/* Loop until the token is shifted or an error occurred */
				215	for (;;) {
				216	/* Fetch the current dfa and state */
				217	register dfa *d = ps->p_stack.s_top->s_dfa;
				218	register state *s = &d->d_state[ps->p_stack.s_top->s_state];
				219
				220	D(printf(" DFA '%s', state %d:",
				221	d->d_name, ps->p_stack.s_top->s_state));
				222
				223	/* Check accelerator */
				224	if (s->s_lower <= ilabel && ilabel < s->s_upper) {
				225	register int x = s->s_accel[ilabel - s->s_lower];
				226	if (x != -1) {
				227	if (x & (1<<7)) {
				228	/* Push non-terminal */
				229	int nt = (x >> 8) + NT_OFFSET;
				230	int arrow = x & ((1<<7)-1);
				231	dfa *d1 = finddfa(ps->p_grammar, nt);
				232	if (push(&ps->p_stack, nt, d1, arrow) < 0) {
				233	D(printf(" MemError: push.\n"));
				234	return E_NOMEM;
				235	}
				236	D(printf(" Push ...\n"));
				237	continue;
				238	}
				239
				240	/* Shift the token */
				241	if (shift(&ps->p_stack, type, str, x) < 0) {
				242	D(printf(" MemError: shift.\n"));
				243	return E_NOMEM;
				244	}
				245	D(printf(" Shift.\n"));
				246	/* Pop while we are in an accept-only state */
				247	while (s = &d->d_state
				248	[ps->p_stack.s_top->s_state],
				249	s->s_accept && s->s_narcs == 1) {
				250	D(printf(" Direct pop.\n"));
				251	s_pop(&ps->p_stack);
				252	if (s_empty(&ps->p_stack)) {
				253	D(printf(" ACCEPT.\n"));
				254	return E_DONE;
				255	}
				256	d = ps->p_stack.s_top->s_dfa;
				257	}
				258	return E_OK;
				259	}
				260	}
				261
				262	if (s->s_accept) {
				263	/* Pop this dfa and try again */
				264	s_pop(&ps->p_stack);
				265	D(printf(" Pop ...\n"));
				266	if (s_empty(&ps->p_stack)) {
				267	D(printf(" Error: bottom of stack.\n"));
				268	return E_SYNTAX;
				269	}
				270	continue;
				271	}
				272
				273	/* Stuck, report syntax error */
				274	D(printf(" Error.\n"));
				275	return E_SYNTAX;
				276	}
				277	}
				278
				279
				280	#ifdef DEBUG
				281
				282	/* DEBUG OUTPUT */
				283
				284	void
				285	dumptree(g, n)
				286	grammar *g;
				287	node *n;
				288	{
				289	int i;
				290
				291	if (n == NULL)
				292	printf("NIL");
				293	else {
				294	label l;
				295	l.lb_type = TYPE(n);
				296	l.lb_str = TYPE(str);
				297	printf("%s", labelrepr(&l));
				298	if (ISNONTERMINAL(TYPE(n))) {
				299	printf("(");
				300	for (i = 0; i < NCH(n); i++) {
				301	if (i > 0)
				302	printf(",");
				303	dumptree(g, CHILD(n, i));
				304	}
				305	printf(")");
				306	}
				307	}
				308	}
				309
				310	void
				311	showtree(g, n)
				312	grammar *g;
				313	node *n;
				314	{
				315	int i;
				316
				317	if (n == NULL)
				318	return;
				319	if (ISNONTERMINAL(TYPE(n))) {
				320	for (i = 0; i < NCH(n); i++)
				321	showtree(g, CHILD(n, i));
				322	}
				323	else if (ISTERMINAL(TYPE(n))) {
				324	printf("%s", tok_name[TYPE(n)]);
				325	if (TYPE(n) == NUMBER \|\| TYPE(n) == NAME)
				326	printf("(%s)", STR(n));
				327	printf(" ");
				328	}
				329	else
				330	printf("? ");
				331	}
				332
				333	void
				334	printtree(ps)
				335	parser_state *ps;
				336	{
				337	if (debugging) {
				338	printf("Parse tree:\n");
				339	dumptree(ps->p_grammar, ps->p_tree);
				340	printf("\n");
				341	printf("Tokens:\n");
				342	showtree(ps->p_grammar, ps->p_tree);
				343	printf("\n");
				344	}
				345	printf("Listing:\n");
				346	listtree(ps->p_tree);
				347	printf("\n");
				348	}
				349
				350	#endif /* DEBUG */
				351
				352	/*
				353
				354	Description
				355	-----------
				356
				357	The parser's interface is different than usual: the function addtoken()
				358	must be called for each token in the input. This makes it possible to
				359	turn it into an incremental parsing system later. The parsing system
				360	constructs a parse tree as it goes.
				361
				362	A parsing rule is represented as a Deterministic Finite-state Automaton
				363	(DFA). A node in a DFA represents a state of the parser; an arc represents
				364	a transition. Transitions are either labeled with terminal symbols or
				365	with non-terminals. When the parser decides to follow an arc labeled
				366	with a non-terminal, it is invoked recursively with the DFA representing
				367	the parsing rule for that as its initial state; when that DFA accepts,
				368	the parser that invoked it continues. The parse tree constructed by the
				369	recursively called parser is inserted as a child in the current parse tree.
				370
				371	The DFA's can be constructed automatically from a more conventional
				372	language description. An extended LL(1) grammar (ELL(1)) is suitable.
				373	Certain restrictions make the parser's life easier: rules that can produce
				374	the empty string should be outlawed (there are other ways to put loops
				375	or optional parts in the language). To avoid the need to construct
				376	FIRST sets, we can require that all but the last alternative of a rule
				377	(really: arc going out of a DFA's state) must begin with a terminal
				378	symbol.
				379
				380	As an example, consider this grammar:
				381
				382	expr: term (OP term)*
				383	term: CONSTANT \| '(' expr ')'
				384
				385	The DFA corresponding to the rule for expr is:
				386
				387	------->.---term-->.------->
				388	^ \|
				389	\| \|
				390	\----OP----/
				391
				392	The parse tree generated for the input a+b is:
				393
				394	(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
				395
				396	*/