Blame - src/mesa/shader/grammar.c - platform/external/mesa3d

blob: 964497845710886643d265cabbfcffb8471d2a28 [file] [log] [blame]

Michal Krol	0e7b1d8	2004-03-03 18:10:40 +0000	[diff] [blame]	1	#ifndef GRAMMAR_PORT_BUILD
				2	#error Do not build this file directly, build your grammar_XXX.c instead, which includes this file
				3	#endif
				4
				5	/*
				6	Last Modified: 2004-II-8
				7	*/
				8
				9	/*
				10	INTRODUCTION
				11	------------
				12
				13	The task is to check the syntax of an input string. Input string is a stream of ASCII
				14	characters terminated with a null-character ('\0'). Checking it using C language is
				15	difficult and hard to implement without bugs. It is hard to maintain and make changes when
				16	the syntax changes.
				17
				18	This is because of a high redundancy of the C code. Large blocks of code are duplicated with
				19	only small changes. Even use of macros does not solve the problem because macros cannot
				20	erase the complexity of the problem.
				21
				22	The resolution is to create a new language that will be highly oriented to our task. Once
				23	we describe a particular syntax, we are done. We can then focus on the code that implements
				24	the language. The size and complexity of it is relatively small than the code that directly
				25	checks the syntax.
				26
				27	First, we must implement our new language. Here, the language is implemented in C, but it
				28	could also be implemented in any other language. The code is listed below. We must take
				29	a good care that it is bug free. This is simple because the code is simple and clean.
				30
				31	Next, we must describe the syntax of our new language in itself. Once created and checked
				32	manually that it is correct, we can use it to check another scripts.
				33
				34	Note that our new language loading code does not have to check the syntax. It is because we
				35	assume that the script describing itself is correct, and other scripts can be syntactically
				36	checked by the former script. The loading code must only do semantic checking which leads us to
				37	simple resolving references.
				38
				39	THE LANGUAGE
				40	------------
				41
				42	Here I will describe the syntax of the new language (further called "Synek"). It is mainly a
				43	sequence of declarations terminated by a semicolon. The declaration consists of a symbol,
				44	which is an identifier, and its definition. A definition is in turn a sequence of specifiers
				45	connected with ".and" or ".or" operator. These operators cannot be mixed together in a one
				46	definition. Specifier can be a symbol, string, character, character range or a special
				47	keyword ".true" or ".false".
				48
				49	On the very beginning of the script there is a declaration of a root symbol and is in the form:
				50	.syntax <root_symbol>;
				51	The <root_symbol> must be on of the symbols in declaration sequence. The syntax is correct if
				52	the root symbol evaluates to true. A symbol evaluates to true if the definition associated with
				53	the symbol evaluates to true. Definition evaluation depends on the operator used to connect
				54	specifiers in the definition. If ".and" operator is used, definition evaluates to true if and
				55	only if all the specifiers evaluate to true. If ".or" operator is used, definition evalutes to
				56	true if any of the specifiers evaluates to true. If definition contains only one specifier,
				57	it is evaluated as if it was connected with ".true" keyword by ".and" operator.
				58
				59	If specifier is a ".true" keyword, it always evaluates to true.
				60
				61	If specifier is a ".false" keyword, it always evaluates to false. Specifier evaluates to false
				62	when it does not evaluate to true.
				63
				64	Character range specifier is in the form:
				65	'<first_character>' - '<second_character>'
				66	If specifier is a character range, it evaluates to true if character in the stream is greater
				67	or equal to <first_character> and less or equal to <second_character>. In that situation
				68	the stream pointer is advanced to point to next character in the stream. All C-style escape
				69	sequences are supported although trigraph sequences are not. The comparisions are performed
				70	on 8-bit unsigned integers.
				71
				72	Character specifier is in the form:
				73	'<single_character>'
				74	It evaluates to true if the following character range specifier evaluates to true:
				75	'<single_character>' - '<single_character>'
				76
				77	String specifier is in the form:
				78	"<string>"
				79	Let N be the number of characters in <string>. Let <string>[i] designate i-th character in
				80	<string>. Then the string specifier evaluates to true if and only if for i in the range [0, N)
				81	the following character specifier evaluates to true:
				82	'<string>[i]'
				83	If <string>[i] is a quotation mark, '<string>[i]' is replaced with '\<string>[i]'.
				84
				85	Symbol specifier can be optionally preceded by a ".loop" keyword in the form:
				86	.loop <symbol> (1)
				87	where <symbol> is defined as follows:
				88	<symbol> <definition>; (2)
				89	Construction (1) is replaced by the following code:
				90	<symbol$1>
				91	and declaration (2) is replaced by the following:
				92	<symbol$1> <symbol$2> .or .true;
				93	<symbol$2> <symbol> .and <symbol$1>;
				94	<symbol> <definition>;
				95
				96	ESCAPE SEQUENCES
				97	----------------
				98
				99	Synek supports all escape sequences in character specifiers. The mapping table is listed below.
				100	All occurences of the characters in the first column are replaced with the corresponding
				101	character in the second column.
				102
				103	Escape sequence Represents
				104	------------------------------------------------------------------------------------------------
				105	\a Bell (alert)
				106	\b Backspace
				107	\f Formfeed
				108	\n New line
				109	\r Carriage return
				110	\t Horizontal tab
				111	\v Vertical tab
				112	\' Single quotation mark
				113	\" Double quotation mark
				114	\\ Backslash
				115	\? Literal question mark
				116	\ooo ASCII character in octal notation
				117	\xhhh ASCII character in hexadecimal notation
				118	------------------------------------------------------------------------------------------------
				119
				120	RAISING ERRORS
				121	--------------
				122
				123	Any specifier can be followed by a special construction that is executed when the specifier
				124	evaluates to false. The construction is in the form:
				125	.error <ERROR_TEXT>
				126	<ERROR_TEXT> is an identifier declared earlier by error text declaration. The declaration is
				127	in the form:
				128	.errtext <ERROR_TEXT> "<error_desc>"
				129	When specifier evaluates to false and this construction is present, parsing is stopped
				130	immediately and <error_desc> is returned as a result of parsing. The error position is also
				131	returned and it is meant as an offset from the beggining of the stream to the character that
				132	was valid so far. Example:
				133
				134	(** syntax script **)
				135
				136	.syntax program;
				137	.errtext MISSING_SEMICOLON "missing ';'"
				138	program declaration .and .loop space .and ';' .error MISSING_SEMICOLON .and
				139	.loop space .and '\0';
				140	declaration "declare" .and .loop space .and identifier;
				141	space ' ';
				142
				143	(** sample code **)
				144
				145	declare foo ,
				146
				147	In the example above checking the sample code will result in error message "missing ';'" and
				148	error position 12. The sample code is not correct. Note the presence of '\0' specifier to
				149	assure that there is no code after semicolon - only spaces.
				150	<error_desc> can optionally contain identifier surrounded by dollar signs $. In such a case,
				151	the identifier and dollar signs are replaced by a string retrieved by invoking symbol with
				152	the identifier name. The starting position is the error position. The lenght of the resulting
				153	string is the position after invoking the symbol.
				154
				155	PRODUCTION
				156	----------
				157
				158	Synek not only checks the syntax but it can also produce (emit) bytes associated with specifiers
				159	that evaluate to true. That is, every specifier and optional error construction can be followed
				160	by a number of emit constructions that are in the form:
				161	.emit <parameter>
				162	<paramater> can be a HEX number, identifier, a star * or a dollar $. HEX number is preceded by
				163	0x or 0X. If <parameter> is an identifier, it must be earlier declared by emit code declaration
				164	in the form:
				165	.emtcode <identifier> <hex_number>
				166
				167	When given specifier evaluates to true, all emits associated with the specifier are output
				168	in order they were declared. A star means that last-read character should be output instead
				169	of constant value. Example:
				170
				171	(** syntax script **)
				172
				173	.syntax foobar;
				174	.emtcode WORD_FOO 0x01
				175	.emtcode WORD_BAR 0x02
				176	foobar FOO .emit WORD_FOO .or BAR .emit WORD_BAR .or .true .emit 0x00;
				177	FOO "foo" .and SPACE;
				178	BAR "bar" .and SPACE;
				179	SPACE ' ' .or '\0';
				180
				181	(** sample text 1 **)
				182
				183	foo
				184
				185	(** sample text 2 **)
				186
				187	foobar
				188
				189	For both samples the result will be one-element array. For first sample text it will be
				190	value 1, for second - 0. Note that every text will be accepted because of presence of
				191	.true as an alternative.
				192
				193	Another example:
				194
				195	(** syntax script **)
				196
				197	.syntax declaration;
				198	.emtcode VARIABLE 0x01
				199	declaration "declare" .and .loop space .and
				200	identifier .emit VARIABLE .and (1)
				201	.true .emit 0x00 .and (2)
				202	.loop space .and ';';
				203	space ' ' .or '\t';
				204	identifier .loop id_char .emit *; (3)
				205	id_char 'a'-'z' .or 'A'-'Z' .or '_';
				206
				207	(** sample code **)
				208
				209	declare fubar;
				210
				211	In specifier (1) symbol <identifier> is followed by .emit VARIABLE. If it evaluates to
				212	true, VARIABLE constant and then production of the symbol is output. Specifier (2) is used
				213	to terminate the string with null to signal when the string ends. Specifier (3) outputs
				214	all characters that make declared identifier. The result of sample code will be the
				215	following array:
				216	{ 1, 'f', 'u', 'b', 'a', 'r', 0 }
				217
				218	If .emit is followed by dollar $, it means that current position should be output. Current
				219	position is a 32-bit unsigned integer distance from the very beginning of the parsed string to
				220	first character consumed by the specifier associated with the .emit instruction. Current
				221	position is stored in the output buffer in Little-Endian convention (the lowest byte comes
				222	first).
				223	*/
				224
				225	static void mem_free (void **);
				226
				227	/*
				228	internal error messages
				229	*/
				230	static const byte OUT_OF_MEMORY = (byte ) "internal error 1001: out of physical memory";
				231	static const byte UNRESOLVED_REFERENCE = (byte ) "internal error 1002: unresolved reference '$'";
				232	static const byte INVALID_GRAMMAR_ID = (byte ) "internal error 1003: invalid grammar object";
				233	static const byte INVALID_REGISTER_NAME = (byte ) "internal error 1004: invalid register name: '$'";
				234
				235	static const byte *error_message = NULL;
				236	static byte error_param = NULL; / this is inserted into error_message in place of $ */
				237	static int error_position = -1;
				238
				239	static byte unknown = (byte ) "???";
				240
				241	static void clear_last_error ()
				242	{
				243	/* reset error message */
				244	error_message = NULL;
				245
				246	/* free error parameter - if error_param is a "???" don't free it - it's static */
				247	if (error_param != unknown)
				248	mem_free ((void **) &error_param);
				249	else
				250	error_param = NULL;
				251
				252	/* reset error position */
				253	error_position = -1;
				254	}
				255
				256	static void set_last_error (const byte msg, byte param, int pos)
				257	{
				258	/* error message can only be set only once */
				259	if (error_message != NULL)
				260	{
				261	mem_free (&param);
				262	return;
				263	}
				264
				265	error_message = msg;
				266
				267	if (param != NULL)
				268	error_param = param;
				269	else
				270	error_param = unknown;
				271
				272	error_position = pos;
				273	}
				274
				275	/*
				276	memory management routines
				277	*/
				278	static void *mem_alloc (size_t size)
				279	{
				280	void *ptr = grammar_alloc_malloc (size);
				281	if (ptr == NULL)
				282	set_last_error (OUT_OF_MEMORY, NULL, -1);
				283	return ptr;
				284	}
				285
				286	static void mem_copy (void dst, const void *src, size_t size)
				287	{
				288	return grammar_memory_copy (dst, src, size);
				289	}
				290
				291	static void mem_free (void **ptr)
				292	{
				293	grammar_alloc_free (*ptr);
				294	*ptr = NULL;
				295	}
				296
				297	static void mem_realloc (void ptr, size_t old_size, size_t new_size)
				298	{
				299	void *ptr2 = grammar_alloc_realloc (ptr, old_size, new_size);
				300	if (ptr2 == NULL)
				301	set_last_error (OUT_OF_MEMORY, NULL, -1);
				302	return ptr2;
				303	}
				304
				305	static byte str_copy_n (byte dst, const byte *src, size_t max_len)
				306	{
				307	return grammar_string_copy_n (dst, src, max_len);
				308	}
				309
				310	static byte str_duplicate (const byte str)
				311	{
				312	byte *new_str = grammar_string_duplicate (str);
				313	if (new_str == NULL)
				314	set_last_error (OUT_OF_MEMORY, NULL, -1);
				315	return new_str;
				316	}
				317
				318	static int str_equal (const byte str1, const byte str2)
				319	{
				320	return grammar_string_compare (str1, str2) == 0;
				321	}
				322
				323	static int str_equal_n (const byte str1, const byte str2, unsigned int n)
				324	{
				325	return grammar_string_compare_n (str1, str2, n) == 0;
				326	}
				327
				328	static unsigned int str_length (const byte *str)
				329	{
				330	return grammar_string_length (str);
				331	}
				332
				333	/*
				334	string to byte map typedef
				335	*/
				336	typedef struct map_byte_
				337	{
				338	byte *key;
				339	byte data;
				340	struct map_byte_ *next;
				341	} map_byte;
				342
				343	static void map_byte_create (map_byte **ma)
				344	{
				345	*ma = mem_alloc (sizeof (map_byte));
				346	if (*ma)
				347	{
				348	(**ma).key = NULL;
				349	(**ma).data = '\0';
				350	(**ma).next = NULL;
				351	}
				352	}
				353
				354	/* XXX unfold the recursion */
				355	static void map_byte_destroy (map_byte **ma)
				356	{
				357	if (*ma)
				358	{
				359	map_byte_destroy (&(**ma).next);
				360	mem_free ((void ) &(ma).key);
				361	mem_free ((void **) ma);
				362	}
				363	}
				364
				365	static void map_byte_append (map_byte ma, map_byte nm)
				366	{
				367	while (*ma)
				368	ma = &(**ma).next;
				369	ma = nm;
				370	}
				371
				372	/*
				373	searches the map for the specified key,
				374	returns pointer to the element with the specified key if it exists
				375	returns NULL otherwise
				376	*/
				377	map_byte map_byte_locate (map_byte ma, const byte key)
				378	{
				379	while (*ma)
				380	{
				381	if (str_equal ((**ma).key, key))
				382	return *ma;
				383
				384	ma = &(**ma).next;
				385	}
				386
				387	set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
				388	return NULL;
				389	}
				390
				391	/*
				392	searches the map for specified key,
				393	if the key is matched, *data is filled with data associated with the key,
				394	returns 0 if the key is matched,
				395	returns 1 otherwise
				396	*/
				397	static int map_byte_find (map_byte *ma, const byte key, byte *data)
				398	{
				399	map_byte *found = map_byte_locate (ma, key);
				400	if (found != NULL)
				401	{
				402	*data = found->data;
				403
				404	return 0;
				405	}
				406
				407	return 1;
				408	}
				409
				410	/*
				411	regbyte context typedef
				412
				413	Each regbyte consists of its name and a default value. These are static and created at
				414	grammar script compile-time, for example the following line:
				415	.regbyte vertex_blend 0x00
				416	adds a new regbyte named "vertex_blend" to the static list and initializes it to 0.
				417	When the script is executed, this regbyte can be accessed by name for read and write. When a
				418	particular regbyte is written, a new regbyte_ctx entry is added to the top of the regbyte_ctx
				419	stack. The new entry contains information abot which regbyte it references and its new value.
				420	When a given regbyte is accessed for read, the stack is searched top-down to find an
				421	entry that references the regbyte. The first matching entry is used to return the current
				422	value it holds. If no entry is found, the default value is returned.
				423	*/
				424	typedef struct regbyte_ctx_
				425	{
				426	map_byte *m_regbyte;
				427	byte m_current_value;
				428	struct regbyte_ctx_ *m_prev;
				429	} regbyte_ctx;
				430
				431	static void regbyte_ctx_create (regbyte_ctx **re)
				432	{
				433	*re = mem_alloc (sizeof (regbyte_ctx));
				434	if (*re)
				435	{
				436	(**re).m_regbyte = NULL;
				437	(**re).m_prev = NULL;
				438	}
				439	}
				440
				441	static void regbyte_ctx_destroy (regbyte_ctx **re)
				442	{
				443	if (*re)
				444	{
				445	mem_free ((void **) re);
				446	}
				447	}
				448
				449	static byte regbyte_ctx_extract (regbyte_ctx *re, map_byte reg)
				450	{
				451	/* first lookup in the register stack */
				452	while (*re != NULL)
				453	{
				454	if ((**re).m_regbyte == reg)
				455	return (**re).m_current_value;
				456
				457	re = &(**re).m_prev;
				458	}
				459
				460	/* if not found - return the default value */
				461	return reg->data;
				462	}
				463
				464	/*
				465	emit type typedef
				466	*/
				467	typedef enum emit_type_
				468	{
				469	et_byte, /* explicit number */
				470	et_stream, /* eaten character */
				471	et_position /* current position */
				472	} emit_type;
				473
				474	/*
				475	emit destination typedef
				476	*/
				477	typedef enum emit_dest_
				478	{
				479	ed_output, /* write to the output buffer */
				480	ed_regbyte /* write a particular regbyte */
				481	} emit_dest;
				482
				483	/*
				484	emit typedef
				485	*/
				486	typedef struct emit_
				487	{
				488	emit_dest m_emit_dest;
				489	emit_type m_emit_type; /* ed_output */
				490	byte m_byte; /* et_byte */
				491	map_byte m_regbyte; / ed_regbyte */
				492	byte m_regname; / ed_regbyte - temporary */
				493	struct emit_ *m_next;
				494	} emit;
				495
				496	static void emit_create (emit **em)
				497	{
				498	*em = mem_alloc (sizeof (emit));
				499	if (*em)
				500	{
				501	(**em).m_emit_dest = ed_output;
				502	(**em).m_emit_type = et_byte;
				503	(**em).m_byte = '\0';
				504	(**em).m_regbyte = NULL;
				505	(**em).m_regname = NULL;
				506	(**em).m_next = NULL;
				507	}
				508	}
				509
				510	static void emit_destroy (emit **em)
				511	{
				512	if (*em)
				513	{
				514	emit_destroy (&(**em).m_next);
				515	mem_free ((void ) &(em).m_regname);
				516	mem_free ((void **) em);
				517	}
				518	}
				519
				520	/*
				521	error typedef
				522	*/
				523	typedef struct error_
				524	{
				525	byte *m_text;
				526	byte *m_token_name;
				527	struct rule_ *m_token;
				528	} error;
				529
				530	static void error_create (error **er)
				531	{
				532	*er = mem_alloc (sizeof (error));
				533	if (*er)
				534	{
				535	(**er).m_text = NULL;
				536	(**er).m_token_name = NULL;
				537	(**er).m_token = NULL;
				538	}
				539	}
				540
				541	static void error_destroy (error **er)
				542	{
				543	if (*er)
				544	{
				545	mem_free ((void ) &(er).m_text);
				546	mem_free ((void ) &(er).m_token_name);
				547	mem_free ((void **) er);
				548	}
				549	}
				550
				551	struct dict_;
				552	static byte error_get_token (error , struct dict_ , const byte , unsigned int);
				553
				554	/*
				555	condition operand type typedef
				556	*/
				557	typedef enum cond_oper_type_
				558	{
				559	cot_byte, /* constant 8-bit unsigned integer */
				560	cot_regbyte /* pointer to byte register containing the current value */
				561	} cond_oper_type;
				562
				563	/*
				564	condition operand typedef
				565	*/
				566	typedef struct cond_oper_
				567	{
				568	cond_oper_type m_type;
				569	byte m_byte; /* cot_byte */
				570	map_byte m_regbyte; / cot_regbyte */
				571	byte m_regname; / cot_regbyte - temporary */
				572	} cond_oper;
				573
				574	/*
				575	condition type typedef
				576	*/
				577	typedef enum cond_type_
				578	{
				579	ct_equal,
				580	ct_not_equal
				581	} cond_type;
				582
				583	/*
				584	condition typedef
				585	*/
				586	typedef struct cond_
				587	{
				588	cond_type m_type;
				589	cond_oper m_operands[2];
				590	} cond;
				591
				592	static void cond_create (cond **co)
				593	{
				594	*co = mem_alloc (sizeof (cond));
				595	if (*co)
				596	{
				597	(**co).m_operands[0].m_regname = NULL;
				598	(**co).m_operands[1].m_regname = NULL;
				599	}
				600	}
				601
				602	static void cond_destroy (cond **co)
				603	{
				604	if (*co)
				605	{
				606	mem_free ((void ) &(co).m_operands[0].m_regname);
				607	mem_free ((void ) &(co).m_operands[1].m_regname);
				608	mem_free ((void **) co);
				609	}
				610	}
				611
				612	/*
				613	specifier type typedef
				614	*/
				615	typedef enum spec_type_
				616	{
				617	st_false,
				618	st_true,
				619	st_byte,
				620	st_byte_range,
				621	st_string,
				622	st_identifier,
				623	st_identifier_loop,
				624	st_debug
				625	} spec_type;
				626
				627	/*
				628	specifier typedef
				629	*/
				630	typedef struct spec_
				631	{
				632	spec_type m_spec_type;
				633	byte m_byte[2]; /* st_byte, st_byte_range */
				634	byte m_string; / st_string */
				635	struct rule_ m_rule; / st_identifier, st_identifier_loop */
				636	emit *m_emits;
				637	error *m_errtext;
				638	cond *m_cond;
				639	struct spec_ *m_next;
				640	} spec;
				641
				642	static void spec_create (spec **sp)
				643	{
				644	*sp = mem_alloc (sizeof (spec));
				645	if (*sp)
				646	{
				647	(**sp).m_spec_type = st_false;
				648	(**sp).m_byte[0] = '\0';
				649	(**sp).m_byte[1] = '\0';
				650	(**sp).m_string = NULL;
				651	(**sp).m_rule = NULL;
				652	(**sp).m_emits = NULL;
				653	(**sp).m_errtext = NULL;
				654	(**sp).m_cond = NULL;
				655	(**sp).m_next = NULL;
				656	}
				657	}
				658
				659	static void spec_destroy (spec **sp)
				660	{
				661	if (*sp)
				662	{
				663	spec_destroy (&(**sp).m_next);
				664	emit_destroy (&(**sp).m_emits);
				665	error_destroy (&(**sp).m_errtext);
				666	mem_free ((void ) &(sp).m_string);
				667	cond_destroy (&(**sp).m_cond);
				668	mem_free ((void **) sp);
				669	}
				670	}
				671
				672	static void spec_append (spec sp, spec ns)
				673	{
				674	while (*sp)
				675	sp = &(**sp).m_next;
				676	sp = ns;
				677	}
				678
				679	/*
				680	operator typedef
				681	*/
				682	typedef enum oper_
				683	{
				684	op_none,
				685	op_and,
				686	op_or
				687	} oper;
				688
				689	/*
				690	rule typedef
				691	*/
				692	typedef struct rule_
				693	{
				694	oper m_oper;
				695	spec *m_specs;
				696	struct rule_ *m_next;
				697	/* int m_referenced; / / for debugging purposes */
				698	} rule;
				699
				700	static void rule_create (rule **ru)
				701	{
				702	*ru = mem_alloc (sizeof (rule));
				703	if (*ru)
				704	{
				705	(**ru).m_oper = op_none;
				706	(**ru).m_specs = NULL;
				707	(**ru).m_next = NULL;
				708	/* (*ru).m_referenced = 0; /
				709	}
				710	}
				711
				712	static void rule_destroy (rule **ru)
				713	{
				714	if (*ru)
				715	{
				716	rule_destroy (&(**ru).m_next);
				717	spec_destroy (&(**ru).m_specs);
				718	mem_free ((void **) ru);
				719	}
				720	}
				721
				722	static void rule_append (rule ru, rule nr)
				723	{
				724	while (*ru)
				725	ru = &(**ru).m_next;
				726	ru = nr;
				727	}
				728
				729	/*
				730	returns unique grammar id
				731	*/
				732	static grammar next_valid_grammar_id ()
				733	{
				734	static grammar id = 0;
				735
				736	return ++id;
				737	}
				738
				739	/*
				740	dictionary typedef
				741	*/
				742	typedef struct dict_
				743	{
				744	rule *m_rulez;
				745	rule *m_syntax;
				746	rule *m_string;
				747	map_byte *m_regbytes;
				748	grammar m_id;
				749	struct dict_ *m_next;
				750	} dict;
				751
				752	static void dict_create (dict **di)
				753	{
				754	*di = mem_alloc (sizeof (dict));
				755	if (*di)
				756	{
				757	(**di).m_rulez = NULL;
				758	(**di).m_syntax = NULL;
				759	(**di).m_string = NULL;
				760	(**di).m_regbytes = NULL;
				761	(**di).m_id = next_valid_grammar_id ();
				762	(**di).m_next = NULL;
				763	}
				764	}
				765
				766	static void dict_destroy (dict **di)
				767	{
				768	if (*di)
				769	{
				770	rule_destroy (&(**di).m_rulez);
				771	map_byte_destroy (&(**di).m_regbytes);
				772	mem_free ((void **) di);
				773	}
				774	}
				775
				776	static void dict_append (dict di, dict nd)
				777	{
				778	while (*di)
				779	di = &(**di).m_next;
				780	di = nd;
				781	}
				782
				783	static void dict_find (dict di, grammar key, dict data)
				784	{
				785	while (*di)
				786	{
				787	if ((**di).m_id == key)
				788	{
				789	data = di;
				790	return;
				791	}
				792
				793	di = &(**di).m_next;
				794	}
				795
				796	*data = NULL;
				797	}
				798
				799	static dict *g_dicts = NULL;
				800
				801	/*
				802	byte array typedef
				803
				804	XXX this class is going to be replaced by a faster one, soon
				805	*/
				806	typedef struct barray_
				807	{
				808	byte *data;
				809	unsigned int len;
				810	} barray;
				811
				812	static void barray_create (barray **ba)
				813	{
				814	*ba = mem_alloc (sizeof (barray));
				815	if (*ba)
				816	{
				817	(**ba).data = NULL;
				818	(**ba).len = 0;
				819	}
				820	}
				821
				822	static void barray_destroy (barray **ba)
				823	{
				824	if (*ba)
				825	{
				826	mem_free ((void ) &(ba).data);
				827	mem_free ((void **) ba);
				828	}
				829	}
				830
				831	/*
				832	reallocates byte array to requested size,
				833	returns 0 on success,
				834	returns 1 otherwise
				835	*/
				836	static int barray_resize (barray **ba, unsigned int nlen)
				837	{
				838	byte *new_pointer;
				839
				840	if (nlen == 0)
				841	{
				842	mem_free ((void ) &(ba).data);
				843	(**ba).data = NULL;
				844	(**ba).len = 0;
				845
				846	return 0;
				847	}
				848	else
				849	{
				850	new_pointer = mem_realloc ((ba).data, (ba).len * sizeof (byte), nlen * sizeof (byte));
				851	if (new_pointer)
				852	{
				853	(**ba).data = new_pointer;
				854	(**ba).len = nlen;
				855
				856	return 0;
				857	}
				858	}
				859
				860	return 1;
				861	}
				862
				863	/*
				864	adds byte array pointed by nb to the end of array pointed by ba,
				865	returns 0 on success,
				866	returns 1 otherwise
				867	*/
				868	static int barray_append (barray ba, barray nb)
				869	{
				870	const unsigned int len = (**ba).len;
				871
				872	if (barray_resize (ba, (ba).len + (nb).len))
				873	return 1;
				874
				875	mem_copy ((ba).data + len, (nb).data, (**nb).len);
				876
				877	return 0;
				878	}
				879
				880	/*
				881	adds emit chain pointed by em to the end of array pointed by *ba,
				882	returns 0 on success,
				883	returns 1 otherwise
				884	*/
				885	static int barray_push (barray *ba, emit em, byte c, unsigned int pos, regbyte_ctx **rbc)
				886	{
				887	emit *temp = em;
				888	unsigned int count = 0;
				889
				890	while (temp)
				891	{
				892	if (temp->m_emit_dest == ed_output)
				893	if (temp->m_emit_type == et_position)
				894	count += 4; /* position is a 32-bit unsigned integer */
				895	else
				896	count++;
				897
				898	temp = temp->m_next;
				899	}
				900
				901	if (barray_resize (ba, (**ba).len + count))
				902	return 1;
				903
				904	while (em)
				905	{
				906	if (em->m_emit_dest == ed_output)
				907	{
				908	if (em->m_emit_type == et_byte)
				909	(ba).data[(ba).len - count--] = em->m_byte;
				910	else if (em->m_emit_type == et_stream)
				911	(ba).data[(ba).len - count--] = c;
				912	else // em->type == et_position
				913	(ba).data[(ba).len - count--] = (byte) pos,
				914	(ba).data[(ba).len - count--] = (byte) (pos >> 8),
				915	(ba).data[(ba).len - count--] = (byte) (pos >> 16),
				916	(ba).data[(ba).len - count--] = (byte) (pos >> 24);
				917	}
				918	else
				919	{
				920	regbyte_ctx *new_rbc;
				921	regbyte_ctx_create (&new_rbc);
				922	if (new_rbc == NULL)
				923	return 1;
				924
				925	new_rbc->m_prev = *rbc;
				926	new_rbc->m_regbyte = em->m_regbyte;
				927	*rbc = new_rbc;
				928
				929	if (em->m_emit_type == et_byte)
				930	new_rbc->m_current_value = em->m_byte;
				931	else if (em->m_emit_type == et_stream)
				932	new_rbc->m_current_value = c;
				933	}
				934
				935	em = em->m_next;
				936	}
				937
				938	return 0;
				939	}
				940
				941	/*
				942	string to string map typedef
				943	*/
				944	typedef struct map_str_
				945	{
				946	byte *key;
				947	byte *data;
				948	struct map_str_ *next;
				949	} map_str;
				950
				951	static void map_str_create (map_str **ma)
				952	{
				953	*ma = mem_alloc (sizeof (map_str));
				954	if (*ma)
				955	{
				956	(**ma).key = NULL;
				957	(**ma).data = NULL;
				958	(**ma).next = NULL;
				959	}
				960	}
				961
				962	static void map_str_destroy (map_str **ma)
				963	{
				964	if (*ma)
				965	{
				966	map_str_destroy (&(**ma).next);
				967	mem_free ((void ) &(ma).key);
				968	mem_free ((void ) &(ma).data);
				969	mem_free ((void **) ma);
				970	}
				971	}
				972
				973	static void map_str_append (map_str ma, map_str nm)
				974	{
				975	while (*ma)
				976	ma = &(**ma).next;
				977	ma = nm;
				978	}
				979
				980	/*
				981	searches the map for specified key,
				982	if the key is matched, *data is filled with data associated with the key,
				983	returns 0 if the key is matched,
				984	returns 1 otherwise
				985	*/
				986	static int map_str_find (map_str *ma, const byte key, byte **data)
				987	{
				988	while (*ma)
				989	{
				990	if (str_equal ((**ma).key, key))
				991	{
				992	data = str_duplicate ((*ma).data);
				993	if (*data == NULL)
				994	return 1;
				995
				996	return 0;
				997	}
				998
				999	ma = &(**ma).next;
				1000	}
				1001
				1002	set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
				1003	return 1;
				1004	}
				1005
				1006	/*
				1007	string to rule map typedef
				1008	*/
				1009	typedef struct map_rule_
				1010	{
				1011	byte *key;
				1012	rule *data;
				1013	struct map_rule_ *next;
				1014	} map_rule;
				1015
				1016	static void map_rule_create (map_rule **ma)
				1017	{
				1018	*ma = mem_alloc (sizeof (map_rule));
				1019	if (*ma)
				1020	{
				1021	(**ma).key = NULL;
				1022	(**ma).data = NULL;
				1023	(**ma).next = NULL;
				1024	}
				1025	}
				1026
				1027	static void map_rule_destroy (map_rule **ma)
				1028	{
				1029	if (*ma)
				1030	{
				1031	map_rule_destroy (&(**ma).next);
				1032	mem_free ((void ) &(ma).key);
				1033	mem_free ((void **) ma);
				1034	}
				1035	}
				1036
				1037	static void map_rule_append (map_rule ma, map_rule nm)
				1038	{
				1039	while (*ma)
				1040	ma = &(**ma).next;
				1041	ma = nm;
				1042	}
				1043
				1044	/*
				1045	searches the map for specified key,
				1046	if the key is matched, *data is filled with data associated with the key,
				1047	returns 0 if the is matched,
				1048	returns 1 otherwise
				1049	*/
				1050	static int map_rule_find (map_rule *ma, const byte key, rule **data)
				1051	{
				1052	while (*ma)
				1053	{
				1054	if (str_equal ((**ma).key, key))
				1055	{
				1056	data = (*ma).data;
				1057
				1058	return 0;
				1059	}
				1060
				1061	ma = &(**ma).next;
				1062	}
				1063
				1064	set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
				1065	return 1;
				1066	}
				1067
				1068	/*
				1069	returns 1 if given character is a white space,
				1070	returns 0 otherwise
				1071	*/
				1072	static int is_space (byte c)
				1073	{
				1074	return c == ' ' \|\| c == '\t' \|\| c == '\n' \|\| c == '\r';
				1075	}
				1076
				1077	/*
				1078	advances text pointer by 1 if character pointed by *text is a space,
				1079	returns 1 if a space has been eaten,
				1080	returns 0 otherwise
				1081	*/
				1082	static int eat_space (const byte **text)
				1083	{
				1084	if (is_space (**text))
				1085	{
				1086	(*text)++;
				1087
				1088	return 1;
				1089	}
				1090
				1091	return 0;
				1092	}
				1093
				1094	/*
				1095	returns 1 if text points to C-style comment start string "/*",
				1096	returns 0 otherwise
				1097	*/
				1098	static int is_comment_start (const byte *text)
				1099	{
				1100	return text[0] == '/' && text[1] == '*';
				1101	}
				1102
				1103	/*
				1104	advances text pointer to first character after C-style comment block - if any,
				1105	returns 1 if C-style comment block has been encountered and eaten,
				1106	returns 0 otherwise
				1107	*/
				1108	static int eat_comment (const byte **text)
				1109	{
				1110	if (is_comment_start (*text))
				1111	{
				1112	/* text points to comment block - skip two characters to enter comment body /
				1113	*text += 2;
				1114	/* skip any character except consecutive '' and '/' /
				1115	while (!((text)[0] == '' && (*text)[1] == '/'))
				1116	(*text)++;
				1117	/* skip those two terminating characters */
				1118	*text += 2;
				1119
				1120	return 1;
				1121	}
				1122
				1123	return 0;
				1124	}
				1125
				1126	/*
				1127	advances text pointer to first character that is neither space nor C-style comment block
				1128	*/
				1129	static void eat_spaces (const byte **text)
				1130	{
				1131	while (eat_space (text) \|\| eat_comment (text))
				1132	;
				1133	}
				1134
				1135	/*
				1136	resizes string pointed by *ptr to successfully add character c to the end of the string,
				1137	returns 0 on success,
				1138	returns 1 otherwise
				1139	*/
				1140	static int string_grow (byte *ptr, unsigned int len, byte c)
				1141	{
				1142	/* reallocate the string in 16-byte increments */
				1143	if ((len & 0x0F) == 0x0F \|\| ptr == NULL)
				1144	{
				1145	byte tmp = mem_realloc (ptr, ((len + 1) & ~0x0F) sizeof (byte),
				1146	((len + 1 + 0x10) & ~0x0F) sizeof (byte));
				1147	if (tmp == NULL)
				1148	return 1;
				1149
				1150	*ptr = tmp;
				1151	}
				1152
				1153	if (c)
				1154	{
				1155	/* append given character */
				1156	(ptr)[len] = c;
				1157	(*len)++;
				1158	}
				1159	(ptr)[len] = '\0';
				1160
				1161	return 0;
				1162	}
				1163
				1164	/*
				1165	returns 1 if given character is a valid identifier character a-z, A-Z, 0-9 or _
				1166	returns 0 otherwise
				1167	*/
				1168	static int is_identifier (byte c)
				1169	{
				1170	return (c >= 'a' && c <= 'z') \|\| (c >= 'A' && c <= 'Z') \|\| (c >= '0' && c <= '9') \|\| c == '_';
				1171	}
				1172
				1173	/*
				1174	copies characters from text to id until non-identifier character is encountered,
				1175	assumes that *id points to NULL object - caller is responsible for later freeing the string,
				1176	text pointer is advanced to point past the copied identifier,
				1177	returns 0 if identifier was successfully copied,
				1178	returns 1 otherwise
				1179	*/
				1180	static int get_identifier (const byte text, byte id)
				1181	{
				1182	const byte t = text;
				1183	byte *p = NULL;
				1184	unsigned int len = 0;
				1185
				1186	if (string_grow (&p, &len, '\0'))
				1187	return 1;
				1188
				1189	/* loop while next character in buffer is valid for identifiers */
				1190	while (is_identifier (*t))
				1191	{
				1192	if (string_grow (&p, &len, *t++))
				1193	{
				1194	mem_free ((void **) &p);
				1195	return 1;
				1196	}
				1197	}
				1198
				1199	*text = t;
				1200	*id = p;
				1201
				1202	return 0;
				1203	}
				1204
				1205	/*
				1206	returns 1 if given character is HEX digit 0-9, A-F or a-f,
				1207	returns 0 otherwise
				1208	*/
				1209	static int is_hex (byte c)
				1210	{
				1211	return (c >= '0' && c <= '9') \|\| (c >= 'A' && c <= 'F') \|\| (c >= 'a' && c <= 'f');
				1212	}
				1213
				1214	/*
				1215	returns value of passed character as if it was HEX digit
				1216	*/
				1217	static unsigned int hex2dec (byte c)
				1218	{
				1219	if (c >= '0' && c <= '9')
				1220	return c - '0';
				1221	if (c >= 'A' && c <= 'F')
				1222	return c - 'A' + 10;
				1223	return c - 'a' + 10;
				1224	}
				1225
				1226	/*
				1227	converts sequence of HEX digits pointed by *text until non-HEX digit is encountered,
				1228	advances text pointer past the converted sequence,
				1229	returns the converted value
				1230	*/
				1231	static unsigned int hex_convert (const byte **text)
				1232	{
				1233	unsigned int value = 0;
				1234
				1235	while (is_hex (**text))
				1236	{
				1237	value = value * 0x10 + hex2dec (**text);
				1238	(*text)++;
				1239	}
				1240
				1241	return value;
				1242	}
				1243
				1244	/*
				1245	returns 1 if given character is OCT digit 0-7,
				1246	returns 0 otherwise
				1247	*/
				1248	static int is_oct (byte c)
				1249	{
				1250	return c >= '0' && c <= '7';
				1251	}
				1252
				1253	/*
				1254	returns value of passed character as if it was OCT digit
				1255	*/
				1256	static int oct2dec (byte c)
				1257	{
				1258	return c - '0';
				1259	}
				1260
				1261	static byte get_escape_sequence (const byte **text)
				1262	{
				1263	int value = 0;
				1264
				1265	/* skip '\' character */
				1266	(*text)++;
				1267
				1268	switch ((text)++)
				1269	{
				1270	case '\'':
				1271	return '\'';
				1272	case '"':
				1273	return '\"';
				1274	case '?':
				1275	return '\?';
				1276	case '\\':
				1277	return '\\';
				1278	case 'a':
				1279	return '\a';
				1280	case 'b':
				1281	return '\b';
				1282	case 'f':
				1283	return '\f';
				1284	case 'n':
				1285	return '\n';
				1286	case 'r':
				1287	return '\r';
				1288	case 't':
				1289	return '\t';
				1290	case 'v':
				1291	return '\v';
				1292	case 'x':
				1293	return (byte) hex_convert (text);
				1294	}
				1295
				1296	(*text)--;
				1297	if (is_oct (**text))
				1298	{
				1299	value = oct2dec ((text)++);
				1300	if (is_oct (**text))
				1301	{
				1302	value = value * 010 + oct2dec ((text)++);
				1303	if (is_oct (**text))
				1304	value = value * 010 + oct2dec ((text)++);
				1305	}
				1306	}
				1307
				1308	return (byte) value;
				1309	}
				1310
				1311	/*
				1312	copies characters from text to str until " or ' character is encountered,
				1313	assumes that *str points to NULL object - caller is responsible for later freeing the string,
				1314	assumes that *text points to " or ' character that starts the string,
				1315	text pointer is advanced to point past the " or ' character,
				1316	returns 0 if string was successfully copied,
				1317	returns 1 otherwise
				1318	*/
				1319	static int get_string (const byte text, byte str)
				1320	{
				1321	const byte t = text;
				1322	byte *p = NULL;
				1323	unsigned int len = 0;
				1324	byte term_char;
				1325
				1326	if (string_grow (&p, &len, '\0'))
				1327	return 1;
				1328
				1329	/* read " or ' character that starts the string */
				1330	term_char = *t++;
				1331	/* while next character is not the terminating character */
				1332	while (t && t != term_char)
				1333	{
				1334	byte c;
				1335
				1336	if (*t == '\\')
				1337	c = get_escape_sequence (&t);
				1338	else
				1339	c = *t++;
				1340
				1341	if (string_grow (&p, &len, c))
				1342	{
				1343	mem_free ((void **) &p);
				1344	return 1;
				1345	}
				1346	}
				1347	/* skip " or ' character that ends the string */
				1348	t++;
				1349
				1350	*text = t;
				1351	*str = p;
				1352	return 0;
				1353	}
				1354
				1355	/*
				1356	gets emit code, the syntax is: ".emtcode" " " <symbol> " " ("0x" \| "0X") <hex_value>
				1357	assumes that *text already points to <symbol>,
				1358	returns 0 if emit code is successfully read,
				1359	returns 1 otherwise
				1360	*/
				1361	static int get_emtcode (const byte text, map_byte ma)
				1362	{
				1363	const byte t = text;
				1364	map_byte *m = NULL;
				1365
				1366	map_byte_create (&m);
				1367	if (m == NULL)
				1368	return 1;
				1369
				1370	if (get_identifier (&t, &m->key))
				1371	{
				1372	map_byte_destroy (&m);
				1373	return 1;
				1374	}
				1375	eat_spaces (&t);
				1376
				1377	if (*t == '\'')
				1378	{
				1379	byte *c;
				1380
				1381	if (get_string (&t, &c))
				1382	{
				1383	map_byte_destroy (&m);
				1384	return 1;
				1385	}
				1386
				1387	m->data = (byte) c[0];
				1388	mem_free ((void **) &c);
				1389	}
				1390	else
				1391	{
				1392	/* skip HEX "0x" or "0X" prefix */
				1393	t += 2;
				1394	m->data = (byte) hex_convert (&t);
				1395	}
				1396
				1397	eat_spaces (&t);
				1398
				1399	*text = t;
				1400	*ma = m;
				1401	return 0;
				1402	}
				1403
				1404	/*
				1405	gets regbyte declaration, the syntax is: ".regbyte" " " <symbol> " " ("0x" \| "0X") <hex_value>
				1406	assumes that *text already points to <symbol>,
				1407	returns 0 if regbyte is successfully read,
				1408	returns 1 otherwise
				1409	*/
				1410	static int get_regbyte (const byte text, map_byte ma)
				1411	{
				1412	return get_emtcode (text, ma);
				1413	}
				1414
				1415	/*
				1416	returns 0 on success,
				1417	returns 1 otherwise
				1418	*/
				1419	static int get_errtext (const byte text, map_str ma)
				1420	{
				1421	const byte t = text;
				1422	map_str *m = NULL;
				1423
				1424	map_str_create (&m);
				1425	if (m == NULL)
				1426	return 1;
				1427
				1428	if (get_identifier (&t, &m->key))
				1429	{
				1430	map_str_destroy (&m);
				1431	return 1;
				1432	}
				1433	eat_spaces (&t);
				1434
				1435	if (get_string (&t, &m->data))
				1436	{
				1437	map_str_destroy (&m);
				1438	return 1;
				1439	}
				1440	eat_spaces (&t);
				1441
				1442	*text = t;
				1443	*ma = m;
				1444	return 0;
				1445	}
				1446
				1447	/*
				1448	returns 0 on success,
				1449	returns 1 otherwise,
				1450	*/
				1451	static int get_error (const byte text, error er, map_str *maps)
				1452	{
				1453	const byte t = text;
				1454	byte *temp = NULL;
				1455
				1456	if (*t != '.')
				1457	return 0;
				1458
				1459	t++;
				1460	if (get_identifier (&t, &temp))
				1461	return 1;
				1462	eat_spaces (&t);
				1463
				1464	if (!str_equal ((byte *) "error", temp))
				1465	{
				1466	mem_free ((void **) &temp);
				1467	return 0;
				1468	}
				1469
				1470	mem_free ((void **) &temp);
				1471
				1472	error_create (er);
				1473	if (*er == NULL)
				1474	return 1;
				1475
				1476	if (*t == '\"')
				1477	{
				1478	if (get_string (&t, &(**er).m_text))
				1479	{
				1480	error_destroy (er);
				1481	return 1;
				1482	}
				1483	eat_spaces (&t);
				1484	}
				1485	else
				1486	{
				1487	if (get_identifier (&t, &temp))
				1488	{
				1489	error_destroy (er);
				1490	return 1;
				1491	}
				1492	eat_spaces (&t);
				1493
				1494	if (map_str_find (&maps, temp, &(**er).m_text))
				1495	{
				1496	mem_free ((void **) &temp);
				1497	error_destroy (er);
				1498	return 1;
				1499	}
				1500
				1501	mem_free ((void **) &temp);
				1502	}
				1503
				1504	/* try to extract "token" from "...$token$..." */
				1505	{
				1506	byte *processed = NULL;
				1507	unsigned int len = 0, i = 0;
				1508
				1509	if (string_grow (&processed, &len, '\0'))
				1510	{
				1511	error_destroy (er);
				1512	return 1;
				1513	}
				1514
				1515	while (i < str_length ((**er).m_text))
				1516	{
				1517	/* check if the dollar sign is repeated - if so skip it */
				1518	if ((er).m_text[i] == '$' && (er).m_text[i + 1] == '$')
				1519	{
				1520	if (string_grow (&processed, &len, '$'))
				1521	{
				1522	mem_free ((void **) &processed);
				1523	error_destroy (er);
				1524	return 1;
				1525	}
				1526
				1527	i += 2;
				1528	}
				1529	else if ((**er).m_text[i] != '$')
				1530	{
				1531	if (string_grow (&processed, &len, (**er).m_text[i]))
				1532	{
				1533	mem_free ((void **) &processed);
				1534	error_destroy (er);
				1535	return 1;
				1536	}
				1537
				1538	i++;
				1539	}
				1540	else
				1541	{
				1542	if (string_grow (&processed, &len, '$'))
				1543	{
				1544	mem_free ((void **) &processed);
				1545	error_destroy (er);
				1546	return 1;
				1547	}
				1548
				1549	{
				1550	/* length of token being extracted */
				1551	unsigned int tlen = 0;
				1552
				1553	if (string_grow (&(**er).m_token_name, &tlen, '\0'))
				1554	{
				1555	mem_free ((void **) &processed);
				1556	error_destroy (er);
				1557	return 1;
				1558	}
				1559
				1560	/* skip the dollar sign */
				1561	i++;
				1562
				1563	while ((**er).m_text[i] != '$')
				1564	{
				1565	if (string_grow (&(er).m_token_name, &tlen, (er).m_text[i]))
				1566	{
				1567	mem_free ((void **) &processed);
				1568	error_destroy (er);
				1569	return 1;
				1570	}
				1571
				1572	i++;
				1573	}
				1574
				1575	/* skip the dollar sign */
				1576	i++;
				1577	}
				1578	}
				1579	}
				1580
				1581	mem_free ((void ) &(er).m_text);
				1582	(**er).m_text = processed;
				1583	}
				1584
				1585	*text = t;
				1586	return 0;
				1587	}
				1588
				1589	/*
				1590	returns 0 on success,
				1591	returns 1 otherwise,
				1592	*/
				1593	static int get_emits (const byte text, emit em, map_byte *mapb)
				1594	{
				1595	const byte t = text;
				1596	byte *temp = NULL;
				1597	emit *e = NULL;
				1598	emit_dest dest;
				1599
				1600	if (*t != '.')
				1601	return 0;
				1602
				1603	t++;
				1604	if (get_identifier (&t, &temp))
				1605	return 1;
				1606	eat_spaces (&t);
				1607
				1608	/* .emit */
				1609	if (str_equal ((byte *) "emit", temp))
				1610	dest = ed_output;
				1611	/* .load */
				1612	else if (str_equal ((byte *) "load", temp))
				1613	dest = ed_regbyte;
				1614	else
				1615	{
				1616	mem_free ((void **) &temp);
				1617	return 0;
				1618	}
				1619
				1620	mem_free ((void **) &temp);
				1621
				1622	emit_create (&e);
				1623	if (e == NULL)
				1624	return 1;
				1625
				1626	e->m_emit_dest = dest;
				1627
				1628	if (dest == ed_regbyte)
				1629	{
				1630	if (get_identifier (&t, &e->m_regname))
				1631	{
				1632	emit_destroy (&e);
				1633	return 1;
				1634	}
				1635	eat_spaces (&t);
				1636	}
				1637
				1638	/* 0xNN */
				1639	if (*t == '0')
				1640	{
				1641	t += 2;
				1642	e->m_byte = (byte) hex_convert (&t);
				1643
				1644	e->m_emit_type = et_byte;
				1645	}
				1646	/* * */
				1647	else if (t == '')
				1648	{
				1649	t++;
				1650
				1651	e->m_emit_type = et_stream;
				1652	}
				1653	/* $ */
				1654	else if (*t == '$')
				1655	{
				1656	t++;
				1657
				1658	e->m_emit_type = et_position;
				1659	}
				1660	/* 'c' */
				1661	else if (*t == '\'')
				1662	{
				1663	if (get_string (&t, &temp))
				1664	{
				1665	emit_destroy (&e);
				1666	return 1;
				1667	}
				1668	e->m_byte = (byte) temp[0];
				1669
				1670	mem_free ((void **) &temp);
				1671
				1672	e->m_emit_type = et_byte;
				1673	}
				1674	else
				1675	{
				1676	if (get_identifier (&t, &temp))
				1677	{
				1678	emit_destroy (&e);
				1679	return 1;
				1680	}
				1681
				1682	if (map_byte_find (&mapb, temp, &e->m_byte))
				1683	{
				1684	mem_free ((void **) &temp);
				1685	emit_destroy (&e);
				1686	return 1;
				1687	}
				1688
				1689	mem_free ((void **) &temp);
				1690
				1691	e->m_emit_type = et_byte;
				1692	}
				1693
				1694	eat_spaces (&t);
				1695
				1696	if (get_emits (&t, &e->m_next, mapb))
				1697	{
				1698	emit_destroy (&e);
				1699	return 1;
				1700	}
				1701
				1702	*text = t;
				1703	*em = e;
				1704	return 0;
				1705	}
				1706
				1707	/*
				1708	returns 0 on success,
				1709	returns 1 otherwise,
				1710	*/
				1711	static int get_spec (const byte text, spec sp, map_str maps, map_byte mapb)
				1712	{
				1713	const byte t = text;
				1714	spec *s = NULL;
				1715
				1716	spec_create (&s);
				1717	if (s == NULL)
				1718	return 1;
				1719
				1720	/* first - read optional .if statement */
				1721	if (*t == '.')
				1722	{
				1723	const byte *u = t;
				1724	byte *keyword = NULL;
				1725
				1726	/* skip the dot */
				1727	u++;
				1728
				1729	if (get_identifier (&u, &keyword))
				1730	{
				1731	spec_destroy (&s);
				1732	return 1;
				1733	}
				1734
				1735	/* .if */
				1736	if (str_equal ((byte *) "if", keyword))
				1737	{
				1738	cond_create (&s->m_cond);
				1739	if (s->m_cond == NULL)
				1740	{
				1741	spec_destroy (&s);
				1742	return 1;
				1743	}
				1744
				1745	/* skip the left paren */
				1746	eat_spaces (&u);
				1747	u++;
				1748
				1749	/* get the left operand */
				1750	eat_spaces (&u);
				1751	if (get_identifier (&u, &s->m_cond->m_operands[0].m_regname))
				1752	{
				1753	spec_destroy (&s);
				1754	return 1;
				1755	}
				1756	s->m_cond->m_operands[0].m_type = cot_regbyte;
				1757
				1758	/* get the operator (!= or ==) */
				1759	eat_spaces (&u);
				1760	if (*u == '!')
				1761	s->m_cond->m_type = ct_not_equal;
				1762	else
				1763	s->m_cond->m_type = ct_equal;
				1764	u += 2;
				1765
				1766	/* skip the 0x prefix */
				1767	eat_spaces (&u);
				1768	u += 2;
				1769
				1770	/* get the right operand */
				1771	s->m_cond->m_operands[1].m_byte = hex_convert (&u);
				1772	s->m_cond->m_operands[1].m_type = cot_byte;
				1773
				1774	/* skip the right paren */
				1775	eat_spaces (&u);
				1776	u++;
				1777
				1778	eat_spaces (&u);
				1779
				1780	t = u;
				1781	}
				1782
				1783	mem_free ((void **) &keyword);
				1784	}
				1785
				1786	if (*t == '\'')
				1787	{
				1788	byte *temp = NULL;
				1789
				1790	if (get_string (&t, &temp))
				1791	{
				1792	spec_destroy (&s);
				1793	return 1;
				1794	}
				1795	eat_spaces (&t);
				1796
				1797	if (*t == '-')
				1798	{
				1799	byte *temp2 = NULL;
				1800
				1801	/* skip the '-' character */
				1802	t++;
				1803	eat_spaces (&t);
				1804
				1805	if (get_string (&t, &temp2))
				1806	{
				1807	mem_free ((void **) &temp);
				1808	spec_destroy (&s);
				1809	return 1;
				1810	}
				1811	eat_spaces (&t);
				1812
				1813	s->m_spec_type = st_byte_range;
				1814	s->m_byte[0] = *temp;
				1815	s->m_byte[1] = *temp2;
				1816
				1817	mem_free ((void **) &temp2);
				1818	}
				1819	else
				1820	{
				1821	s->m_spec_type = st_byte;
				1822	s->m_byte = temp;
				1823	}
				1824
				1825	mem_free ((void **) &temp);
				1826	}
				1827	else if (*t == '"')
				1828	{
				1829	if (get_string (&t, &s->m_string))
				1830	{
				1831	spec_destroy (&s);
				1832	return 1;
				1833	}
				1834	eat_spaces (&t);
				1835
				1836	s->m_spec_type = st_string;
				1837	}
				1838	else if (*t == '.')
				1839	{
				1840	byte *keyword = NULL;
				1841
				1842	/* skip the dot */
				1843	t++;
				1844
				1845	if (get_identifier (&t, &keyword))
				1846	{
				1847	spec_destroy (&s);
				1848	return 1;
				1849	}
				1850	eat_spaces (&t);
				1851
				1852	/* .true */
				1853	if (str_equal ((byte *) "true", keyword))
				1854	{
				1855	s->m_spec_type = st_true;
				1856	}
				1857	/* .false */
				1858	else if (str_equal ((byte *) "false", keyword))
				1859	{
				1860	s->m_spec_type = st_false;
				1861	}
				1862	/* .debug */
				1863	else if (str_equal ((byte *) "debug", keyword))
				1864	{
				1865	s->m_spec_type = st_debug;
				1866	}
				1867	/* .loop */
				1868	else if (str_equal ((byte *) "loop", keyword))
				1869	{
				1870	if (get_identifier (&t, &s->m_string))
				1871	{
				1872	mem_free ((void **) &keyword);
				1873	spec_destroy (&s);
				1874	return 1;
				1875	}
				1876	eat_spaces (&t);
				1877
				1878	s->m_spec_type = st_identifier_loop;
				1879	}
				1880
				1881	mem_free ((void **) &keyword);
				1882	}
				1883	else
				1884	{
				1885	if (get_identifier (&t, &s->m_string))
				1886	{
				1887	spec_destroy (&s);
				1888	return 1;
				1889	}
				1890	eat_spaces (&t);
				1891
				1892	s->m_spec_type = st_identifier;
				1893	}
				1894
				1895	if (get_error (&t, &s->m_errtext, maps))
				1896	{
				1897	spec_destroy (&s);
				1898	return 1;
				1899	}
				1900
				1901	if (get_emits (&t, &s->m_emits, mapb))
				1902	{
				1903	spec_destroy (&s);
				1904	return 1;
				1905	}
				1906
				1907	*text = t;
				1908	*sp = s;
				1909	return 0;
				1910	}
				1911
				1912	/*
				1913	returns 0 on success,
				1914	returns 1 otherwise,
				1915	*/
				1916	static int get_rule (const byte text, rule ru, map_str maps, map_byte mapb)
				1917	{
				1918	const byte t = text;
				1919	rule *r = NULL;
				1920
				1921	rule_create (&r);
				1922	if (r == NULL)
				1923	return 1;
				1924
				1925	if (get_spec (&t, &r->m_specs, maps, mapb))
				1926	{
				1927	rule_destroy (&r);
				1928	return 1;
				1929	}
				1930
				1931	while (*t != ';')
				1932	{
				1933	byte *op = NULL;
				1934	spec *sp = NULL;
				1935
				1936	/* skip the dot that precedes "and" or "or" */
				1937	t++;
				1938
				1939	/* read "and" or "or" keyword */
				1940	if (get_identifier (&t, &op))
				1941	{
				1942	rule_destroy (&r);
				1943	return 1;
				1944	}
				1945	eat_spaces (&t);
				1946
				1947	if (r->m_oper == op_none)
				1948	{
				1949	/* .and */
				1950	if (str_equal ((byte *) "and", op))
				1951	r->m_oper = op_and;
				1952	/* .or */
				1953	else
				1954	r->m_oper = op_or;
				1955	}
				1956
				1957	mem_free ((void **) &op);
				1958
				1959	if (get_spec (&t, &sp, maps, mapb))
				1960	{
				1961	rule_destroy (&r);
				1962	return 1;
				1963	}
				1964
				1965	spec_append (&r->m_specs, &sp);
				1966	}
				1967
				1968	/* skip the semicolon */
				1969	t++;
				1970	eat_spaces (&t);
				1971
				1972	*text = t;
				1973	*ru = r;
				1974	return 0;
				1975	}
				1976
				1977	/*
				1978	returns 0 on success,
				1979	returns 1 otherwise,
				1980	*/
				1981	static int update_dependency (map_rule mapr, byte symbol, rule **ru)
				1982	{
				1983	if (map_rule_find (&mapr, symbol, ru))
				1984	return 1;
				1985
				1986	/* (*ru).m_referenced = 1; /
				1987
				1988	return 0;
				1989	}
				1990
				1991	/*
				1992	returns 0 on success,
				1993	returns 1 otherwise,
				1994	*/
				1995	static int update_dependencies (dict di, map_rule mapr, byte **syntax_symbol,
				1996	byte *string_symbol, map_byte regbytes)
				1997	{
				1998	rule *rulez = di->m_rulez;
				1999
				2000	/* update dependecies for the root and lexer symbols */
				2001	if (update_dependency (mapr, *syntax_symbol, &di->m_syntax) \|\|
				2002	(string_symbol != NULL && update_dependency (mapr, string_symbol, &di->m_string)))
				2003	return 1;
				2004
				2005	mem_free ((void **) syntax_symbol);
				2006	mem_free ((void **) string_symbol);
				2007
				2008	/* update dependecies for the rest of the rules */
				2009	while (rulez)
				2010	{
				2011	spec *sp = rulez->m_specs;
				2012
				2013	/* iterate through all the specifiers */
				2014	while (sp)
				2015	{
				2016	/* update dependency for identifier */
				2017	if (sp->m_spec_type == st_identifier \|\| sp->m_spec_type == st_identifier_loop)
				2018	{
				2019	if (update_dependency (mapr, sp->m_string, &sp->m_rule))
				2020	return 1;
				2021
				2022	mem_free ((void **) &sp->m_string);
				2023	}
				2024
				2025	/* some errtexts reference to a rule */
				2026	if (sp->m_errtext && sp->m_errtext->m_token_name)
				2027	{
				2028	if (update_dependency (mapr, sp->m_errtext->m_token_name, &sp->m_errtext->m_token))
				2029	return 1;
				2030
				2031	mem_free ((void **) &sp->m_errtext->m_token_name);
				2032	}
				2033
				2034	/* update dependency for condition */
				2035	if (sp->m_cond)
				2036	{
				2037	int i;
				2038	for (i = 0; i < 2; i++)
				2039	if (sp->m_cond->m_operands[i].m_type == cot_regbyte)
				2040	{
				2041	sp->m_cond->m_operands[i].m_regbyte = map_byte_locate (&regbytes,
				2042	sp->m_cond->m_operands[i].m_regname);
				2043
				2044	if (sp->m_cond->m_operands[i].m_regbyte == NULL)
				2045	return 1;
				2046
				2047	mem_free ((void **) &sp->m_cond->m_operands[i].m_regname);
				2048	}
				2049	}
				2050
				2051	/* update dependency for all .load instructions */
				2052	if (sp->m_emits)
				2053	{
				2054	emit *em = sp->m_emits;
				2055	while (em != NULL)
				2056	{
				2057	if (em->m_emit_dest == ed_regbyte)
				2058	{
				2059	em->m_regbyte = map_byte_locate (&regbytes, em->m_regname);
				2060
				2061	if (em->m_regbyte == NULL)
				2062	return 1;
				2063
				2064	mem_free ((void **) &em->m_regname);
				2065	}
				2066
				2067	em = em->m_next;
				2068	}
				2069	}
				2070
				2071	sp = sp->m_next;
				2072	}
				2073
				2074	rulez = rulez->m_next;
				2075	}
				2076
				2077	/* check for unreferenced symbols */
				2078	/* de = di->m_defntns;
				2079	while (de)
				2080	{
				2081	if (!de->m_referenced)
				2082	{
				2083	map_def *ma = mapd;
				2084	while (ma)
				2085	{
				2086	if (ma->data == de)
				2087	{
				2088	assert (0);
				2089	break;
				2090	}
				2091	ma = ma->next;
				2092	}
				2093	}
				2094	de = de->m_next;
				2095	}
				2096	*/
				2097	return 0;
				2098	}
				2099
				2100	static int satisfies_condition (cond co, regbyte_ctx ctx)
				2101	{
				2102	byte values[2];
				2103	int i;
				2104
				2105	if (co == NULL)
				2106	return 1;
				2107
				2108	for (i = 0; i < 2; i++)
				2109	switch (co->m_operands[i].m_type)
				2110	{
				2111	case cot_byte:
				2112	values[i] = co->m_operands[i].m_byte;
				2113	break;
				2114	case cot_regbyte:
				2115	values[i] = regbyte_ctx_extract (&ctx, co->m_operands[i].m_regbyte);
				2116	break;
				2117	}
				2118
				2119	switch (co->m_type)
				2120	{
				2121	case ct_equal:
				2122	return values[0] == values[1];
				2123	case ct_not_equal:
				2124	return values[0] != values[1];
				2125	}
				2126
				2127	return 0;
				2128	}
				2129
				2130	static void free_regbyte_ctx_stack (regbyte_ctx top, regbyte_ctx limit)
				2131	{
				2132	while (top != limit)
				2133	{
				2134	regbyte_ctx *rbc = top->m_prev;
				2135	regbyte_ctx_destroy (&top);
				2136	top = rbc;
				2137	}
				2138	}
				2139
				2140	typedef enum match_result_
				2141	{
				2142	mr_not_matched, /* the examined string does not match */
				2143	mr_matched, /* the examined string matches */
				2144	mr_error_raised, /* mr_not_matched + error has been raised */
				2145	mr_dont_emit, /* used by identifier loops only */
				2146	mr_internal_error /* an internal error has occured such as out of memory */
				2147	} match_result;
				2148
				2149	/*
				2150	This function does the main job. It parses the text and generates output data.
				2151
				2152	XXX optimize it - the barray seems to be the bottleneck
				2153	*/
				2154	static match_result match (dict di, const byte text, unsigned int index, rule ru, barray **ba,
				2155	int filtering_string, regbyte_ctx **rbc)
				2156	{
				2157	unsigned int ind = *index;
				2158	match_result status = mr_not_matched;
				2159	spec *sp = ru->m_specs;
				2160	regbyte_ctx ctx = rbc;
				2161
				2162	/* for every specifier in the rule */
				2163	while (sp)
				2164	{
				2165	unsigned int i, len, save_ind = ind;
				2166	barray *array = NULL;
				2167
				2168	if (satisfies_condition (sp->m_cond, ctx))
				2169	{
				2170	switch (sp->m_spec_type)
				2171	{
				2172	case st_identifier:
				2173	barray_create (&array);
				2174	if (array == NULL)
				2175	{
				2176	free_regbyte_ctx_stack (ctx, *rbc);
				2177	return mr_internal_error;
				2178	}
				2179
				2180	status = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
				2181	if (status == mr_internal_error)
				2182	{
				2183	free_regbyte_ctx_stack (ctx, *rbc);
				2184	barray_destroy (&array);
				2185	return mr_internal_error;
				2186	}
				2187	break;
				2188	case st_string:
				2189	len = str_length (sp->m_string);
				2190
				2191	/* prefilter the stream */
				2192	if (!filtering_string && di->m_string)
				2193	{
				2194	barray *ba;
				2195	unsigned int filter_index = 0;
				2196	match_result result;
				2197	regbyte_ctx *null_ctx = NULL;
				2198
				2199	barray_create (&ba);
				2200	if (ba == NULL)
				2201	{
				2202	free_regbyte_ctx_stack (ctx, *rbc);
				2203	return mr_internal_error;
				2204	}
				2205
				2206	result = match (di, text + ind, &filter_index, di->m_string, &ba, 1, &null_ctx);
				2207
				2208	if (result == mr_internal_error)
				2209	{
				2210	free_regbyte_ctx_stack (ctx, *rbc);
				2211	barray_destroy (&ba);
				2212	return mr_internal_error;
				2213	}
				2214
				2215	if (result != mr_matched)
				2216	{
				2217	barray_destroy (&ba);
				2218	status = mr_not_matched;
				2219	break;
				2220	}
				2221
				2222	barray_destroy (&ba);
				2223
				2224	if (filter_index != len \|\| !str_equal_n (sp->m_string, text + ind, len))
				2225	{
				2226	status = mr_not_matched;
				2227	break;
				2228	}
				2229
				2230	status = mr_matched;
				2231	ind += len;
				2232	}
				2233	else
				2234	{
				2235	status = mr_matched;
				2236	for (i = 0; status == mr_matched && i < len; i++)
				2237	if (text[ind + i] != sp->m_string[i])
				2238	status = mr_not_matched;
				2239	if (status == mr_matched)
				2240	ind += len;
				2241	}
				2242	break;
				2243	case st_byte:
				2244	status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
				2245	if (status == mr_matched)
				2246	ind++;
				2247	break;
				2248	case st_byte_range:
				2249	status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
				2250	mr_matched : mr_not_matched;
				2251	if (status == mr_matched)
				2252	ind++;
				2253	break;
				2254	case st_true:
				2255	status = mr_matched;
				2256	break;
				2257	case st_false:
				2258	status = mr_not_matched;
				2259	break;
				2260	case st_debug:
				2261	status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
				2262	break;
				2263	case st_identifier_loop:
				2264	barray_create (&array);
				2265	if (array == NULL)
				2266	{
				2267	free_regbyte_ctx_stack (ctx, *rbc);
				2268	return mr_internal_error;
				2269	}
				2270
				2271	status = mr_dont_emit;
				2272	for (;;)
				2273	{
				2274	match_result result;
				2275
				2276	save_ind = ind;
				2277	result = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
				2278
				2279	if (result == mr_error_raised)
				2280	{
				2281	status = result;
				2282	break;
				2283	}
				2284	else if (result == mr_matched)
				2285	{
				2286	if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx) \|\|
				2287	barray_append (ba, &array))
				2288	{
				2289	free_regbyte_ctx_stack (ctx, *rbc);
				2290	barray_destroy (&array);
				2291	return mr_internal_error;
				2292	}
				2293	barray_destroy (&array);
				2294	barray_create (&array);
				2295	if (array == NULL)
				2296	{
				2297	free_regbyte_ctx_stack (ctx, *rbc);
				2298	return mr_internal_error;
				2299	}
				2300	}
				2301	else if (result == mr_internal_error)
				2302	{
				2303	free_regbyte_ctx_stack (ctx, *rbc);
				2304	barray_destroy (&array);
				2305	return mr_internal_error;
				2306	}
				2307	else
				2308	break;
				2309	}
				2310	break;
				2311	}
				2312	}
				2313	else
				2314	{
				2315	status = mr_not_matched;
				2316	}
				2317
				2318	if (status == mr_error_raised)
				2319	{
				2320	free_regbyte_ctx_stack (ctx, *rbc);
				2321	barray_destroy (&array);
				2322
				2323	return mr_error_raised;
				2324	}
				2325
				2326	if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
				2327	{
				2328	free_regbyte_ctx_stack (ctx, *rbc);
				2329	barray_destroy (&array);
				2330
				2331	if (sp->m_errtext)
				2332	{
				2333	set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
				2334	ind), ind);
				2335
				2336	return mr_error_raised;
				2337	}
				2338
				2339	return mr_not_matched;
				2340	}
				2341
				2342	if (status == mr_matched)
				2343	{
				2344	if (sp->m_emits)
				2345	if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx))
				2346	{
				2347	free_regbyte_ctx_stack (ctx, *rbc);
				2348	barray_destroy (&array);
				2349	return mr_internal_error;
				2350	}
				2351
				2352	if (array)
				2353	if (barray_append (ba, &array))
				2354	{
				2355	free_regbyte_ctx_stack (ctx, *rbc);
				2356	barray_destroy (&array);
				2357	return mr_internal_error;
				2358	}
				2359	}
				2360
				2361	barray_destroy (&array);
				2362
				2363	/* if the rule operator is a logical or, we pick up the first matching specifier */
				2364	if (ru->m_oper == op_or && (status == mr_matched \|\| status == mr_dont_emit))
				2365	{
				2366	*index = ind;
				2367	*rbc = ctx;
				2368	return mr_matched;
				2369	}
				2370
				2371	sp = sp->m_next;
				2372	}
				2373
				2374	/* everything went fine - all specifiers match up */
				2375	if (ru->m_oper == op_and && (status == mr_matched \|\| status == mr_dont_emit))
				2376	{
				2377	*index = ind;
				2378	*rbc = ctx;
				2379	return mr_matched;
				2380	}
				2381
				2382	free_regbyte_ctx_stack (ctx, *rbc);
				2383	return mr_not_matched;
				2384	}
				2385
				2386	static byte error_get_token (error er, dict di, const byte text, unsigned int ind)
				2387	{
				2388	byte *str = NULL;
				2389
				2390	if (er->m_token)
				2391	{
				2392	barray *ba;
				2393	unsigned int filter_index = 0;
				2394	regbyte_ctx *ctx = NULL;
				2395
				2396	barray_create (&ba);
				2397	if (ba != NULL)
				2398	{
				2399	if (match (di, text + ind, &filter_index, er->m_token, &ba, 0, &ctx) == mr_matched &&
				2400	filter_index)
				2401	{
				2402	str = mem_alloc (filter_index + 1);
				2403	if (str != NULL)
				2404	{
				2405	str_copy_n (str, text + ind, filter_index);
				2406	str[filter_index] = '\0';
				2407	}
				2408	}
				2409	barray_destroy (&ba);
				2410	}
				2411	}
				2412
				2413	return str;
				2414	}
				2415
				2416	typedef struct grammar_load_state_
				2417	{
				2418	dict *di;
				2419	byte *syntax_symbol;
				2420	byte *string_symbol;
				2421	map_str *maps;
				2422	map_byte *mapb;
				2423	map_rule *mapr;
				2424	} grammar_load_state;
				2425
				2426	static void grammar_load_state_create (grammar_load_state **gr)
				2427	{
				2428	*gr = mem_alloc (sizeof (grammar_load_state));
				2429	if (*gr)
				2430	{
				2431	(**gr).di = NULL;
				2432	(**gr).syntax_symbol = NULL;
				2433	(**gr).string_symbol = NULL;
				2434	(**gr).maps = NULL;
				2435	(**gr).mapb = NULL;
				2436	(**gr).mapr = NULL;
				2437	}
				2438	}
				2439
				2440	static void grammar_load_state_destroy (grammar_load_state **gr)
				2441	{
				2442	if (*gr)
				2443	{
				2444	dict_destroy (&(**gr).di);
				2445	mem_free ((void ) &(gr).syntax_symbol);
				2446	mem_free ((void ) &(gr).string_symbol);
				2447	map_str_destroy (&(**gr).maps);
				2448	map_byte_destroy (&(**gr).mapb);
				2449	map_rule_destroy (&(**gr).mapr);
				2450	mem_free ((void **) gr);
				2451	}
				2452	}
				2453
				2454	/*
				2455	the API
				2456	*/
				2457
				2458	grammar grammar_load_from_text (const byte *text)
				2459	{
				2460	grammar_load_state *g = NULL;
				2461	grammar id = 0;
				2462
				2463	clear_last_error ();
				2464
				2465	grammar_load_state_create (&g);
				2466	if (g == NULL)
				2467	return 0;
				2468
				2469	dict_create (&g->di);
				2470	if (g->di == NULL)
				2471	{
				2472	grammar_load_state_destroy (&g);
				2473	return 0;
				2474	}
				2475
				2476	eat_spaces (&text);
				2477
				2478	/* skip ".syntax" keyword */
				2479	text += 7;
				2480	eat_spaces (&text);
				2481
				2482	/* retrieve root symbol */
				2483	if (get_identifier (&text, &g->syntax_symbol))
				2484	{
				2485	grammar_load_state_destroy (&g);
				2486	return 0;
				2487	}
				2488	eat_spaces (&text);
				2489
				2490	/* skip semicolon */
				2491	text++;
				2492	eat_spaces (&text);
				2493
				2494	while (*text)
				2495	{
				2496	byte *symbol = NULL;
				2497	int is_dot = *text == '.';
				2498
				2499	if (is_dot)
				2500	text++;
				2501
				2502	if (get_identifier (&text, &symbol))
				2503	{
				2504	grammar_load_state_destroy (&g);
				2505	return 0;
				2506	}
				2507	eat_spaces (&text);
				2508
				2509	/* .emtcode */
				2510	if (is_dot && str_equal (symbol, (byte *) "emtcode"))
				2511	{
				2512	map_byte *ma = NULL;
				2513
				2514	mem_free ((void **) &symbol);
				2515
				2516	if (get_emtcode (&text, &ma))
				2517	{
				2518	grammar_load_state_destroy (&g);
				2519	return 0;
				2520	}
				2521
				2522	map_byte_append (&g->mapb, &ma);
				2523	}
				2524	/* .regbyte */
				2525	else if (is_dot && str_equal (symbol, (byte *) "regbyte"))
				2526	{
				2527	map_byte *ma = NULL;
				2528
				2529	mem_free ((void **) &symbol);
				2530
				2531	if (get_regbyte (&text, &ma))
				2532	{
				2533	grammar_load_state_destroy (&g);
				2534	return 0;
				2535	}
				2536
				2537	map_byte_append (&g->di->m_regbytes, &ma);
				2538	}
				2539	/* .errtext */
				2540	else if (is_dot && str_equal (symbol, (byte *) "errtext"))
				2541	{
				2542	map_str *ma = NULL;
				2543
				2544	mem_free ((void **) &symbol);
				2545
				2546	if (get_errtext (&text, &ma))
				2547	{
				2548	grammar_load_state_destroy (&g);
				2549	return 0;
				2550	}
				2551
				2552	map_str_append (&g->maps, &ma);
				2553	}
				2554	/* .string */
				2555	else if (is_dot && str_equal (symbol, (byte *) "string"))
				2556	{
				2557	mem_free ((void **) &symbol);
				2558
				2559	if (g->di->m_string != NULL)
				2560	{
				2561	grammar_load_state_destroy (&g);
				2562	return 0;
				2563	}
				2564
				2565	if (get_identifier (&text, &g->string_symbol))
				2566	{
				2567	grammar_load_state_destroy (&g);
				2568	return 0;
				2569	}
				2570
				2571	/* skip semicolon */
				2572	eat_spaces (&text);
				2573	text++;
				2574	eat_spaces (&text);
				2575	}
				2576	else
				2577	{
				2578	rule *ru = NULL;
				2579	map_rule *ma = NULL;
				2580
				2581	if (get_rule (&text, &ru, g->maps, g->mapb))
				2582	{
				2583	grammar_load_state_destroy (&g);
				2584	return 0;
				2585	}
				2586
				2587	rule_append (&g->di->m_rulez, &ru);
				2588
				2589	/* if a rule consist of only one specifier, give it an ".and" operator */
				2590	if (ru->m_oper == op_none)
				2591	ru->m_oper = op_and;
				2592
				2593	map_rule_create (&ma);
				2594	if (ma == NULL)
				2595	{
				2596	grammar_load_state_destroy (&g);
				2597	return 0;
				2598	}
				2599
				2600	ma->key = symbol;
				2601	ma->data = ru;
				2602	map_rule_append (&g->mapr, &ma);
				2603	}
				2604	}
				2605
				2606	if (update_dependencies (g->di, g->mapr, &g->syntax_symbol, &g->string_symbol,
				2607	g->di->m_regbytes))
				2608	{
				2609	grammar_load_state_destroy (&g);
				2610	return 0;
				2611	}
				2612
				2613	dict_append (&g_dicts, &g->di);
				2614	id = g->di->m_id;
				2615	g->di = NULL;
				2616
				2617	grammar_load_state_destroy (&g);
				2618
				2619	return id;
				2620	}
				2621
				2622	int grammar_set_reg8 (grammar id, const byte *name, byte value)
				2623	{
				2624	dict *di = NULL;
				2625	map_byte *reg = NULL;
				2626
				2627	clear_last_error ();
				2628
				2629	dict_find (&g_dicts, id, &di);
				2630	if (di == NULL)
				2631	{
				2632	set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
				2633	return 0;
				2634	}
				2635
				2636	reg = map_byte_locate (&di->m_regbytes, name);
				2637	if (reg == NULL)
				2638	{
				2639	set_last_error (INVALID_REGISTER_NAME, str_duplicate (name), -1);
				2640	return 0;
				2641	}
				2642
				2643	reg->data = value;
				2644	return 1;
				2645	}
				2646
				2647	int grammar_check (grammar id, const byte text, byte prod, unsigned int size)
				2648	{
				2649	dict *di = NULL;
				2650	barray *ba = NULL;
				2651	unsigned int index = 0;
				2652	regbyte_ctx *rbc = NULL;
				2653
				2654	clear_last_error ();
				2655
				2656	dict_find (&g_dicts, id, &di);
				2657	if (di == NULL)
				2658	{
				2659	set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
				2660	return 0;
				2661	}
				2662
				2663	barray_create (&ba);
				2664	if (ba == NULL)
				2665	return 0;
				2666
				2667	*prod = NULL;
				2668	*size = 0;
				2669
				2670	if (match (di, text, &index, di->m_syntax, &ba, 0, &rbc) != mr_matched)
				2671	{
				2672	barray_destroy (&ba);
				2673	free_regbyte_ctx_stack (rbc, NULL);
				2674	return 0;
				2675	}
				2676
				2677	free_regbyte_ctx_stack (rbc, NULL);
				2678
				2679	prod = mem_alloc (ba->len sizeof (byte));
				2680	if (*prod == NULL)
				2681	{
				2682	barray_destroy (&ba);
				2683	return 0;
				2684	}
				2685
				2686	mem_copy (prod, ba->data, ba->len sizeof (byte));
				2687	*size = ba->len;
				2688	barray_destroy (&ba);
				2689
				2690	return 1;
				2691	}
				2692
				2693	int grammar_destroy (grammar id)
				2694	{
				2695	dict **di = &g_dicts;
				2696
				2697	clear_last_error ();
				2698
				2699	while (*di != NULL)
				2700	{
				2701	if ((**di).m_id == id)
				2702	{
				2703	dict tmp = di;
				2704	di = (*di).m_next;
				2705	dict_destroy (&tmp);
				2706	return 1;
				2707	}
				2708
				2709	di = &(**di).m_next;
				2710	}
				2711
				2712	set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
				2713	return 0;
				2714	}
				2715
				2716	void grammar_get_last_error (byte text, unsigned int size, int pos)
				2717	{
				2718	unsigned int len = 0, dots_made = 0;
				2719	const byte *p = error_message;
				2720
				2721	*text = '\0';
				2722
				2723	#define APPEND_CHARACTER(x) if (dots_made == 0) {\
				2724	if (len < size - 1) {\
				2725	text[len++] = (x); text[len] = '\0';\
				2726	} else {\
				2727	int i;\
				2728	for (i = 0; i < 3; i++)\
				2729	if (--len >= 0)\
				2730	text[len] = '.';\
				2731	dots_made = 1;\
				2732	}\
				2733	}
				2734
				2735	if (p)
				2736	while (*p)
				2737	if (*p == '$')
				2738	{
				2739	const byte *r = error_param;
				2740
				2741	while (*r)
				2742	{
				2743	APPEND_CHARACTER(*r)
				2744	r++;
				2745	}
				2746
				2747	p++;
				2748	}
				2749	else
				2750	{
				2751	APPEND_CHARACTER(*p)
				2752	p++;
				2753	}
				2754
				2755	*pos = error_position;
				2756
				2757	#undef APPEND_CHARACTER
				2758
				2759	}
				2760