Blame - src/mesa/shader/grammar.c - platform/external/mesa3d

blob: 61eee8ae41c22e9049db4c187af5b50cfd85aff3 [file] [log] [blame]

Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1	/*
				2	* Mesa 3-D graphics library
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	3	* Version: 6.2
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	4	*
				5	* Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
				6	*
				7	* Permission is hereby granted, free of charge, to any person obtaining a
				8	* copy of this software and associated documentation files (the "Software"),
				9	* to deal in the Software without restriction, including without limitation
				10	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				11	* and/or sell copies of the Software, and to permit persons to whom the
				12	* Software is furnished to do so, subject to the following conditions:
				13	*
				14	* The above copyright notice and this permission notice shall be included
				15	* in all copies or substantial portions of the Software.
				16	*
				17	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
				18	* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				19	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				20	* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
				21	* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
				22	* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
				23	*/
				24
				25	/**
				26	* \file grammar.c
				27	* syntax parsing engine
				28	* \author Michal Krol
				29	*/
				30
				31	#ifndef GRAMMAR_PORT_BUILD
				32	#error Do not build this file directly, build your grammar_XXX.c instead, which includes this file
				33	#endif
				34
				35	/*
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	36	$Id: grammar.c,v 1.9 2004/10/20 14:54:17 michal Exp $
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	37	*/
				38
				39	/*
				40	INTRODUCTION
				41	------------
				42
				43	The task is to check the syntax of an input string. Input string is a stream of ASCII
				44	characters terminated with a null-character ('\0'). Checking it using C language is
				45	difficult and hard to implement without bugs. It is hard to maintain and make changes when
				46	the syntax changes.
				47
				48	This is because of a high redundancy of the C code. Large blocks of code are duplicated with
				49	only small changes. Even use of macros does not solve the problem because macros cannot
				50	erase the complexity of the problem.
				51
				52	The resolution is to create a new language that will be highly oriented to our task. Once
				53	we describe a particular syntax, we are done. We can then focus on the code that implements
				54	the language. The size and complexity of it is relatively small than the code that directly
				55	checks the syntax.
				56
				57	First, we must implement our new language. Here, the language is implemented in C, but it
				58	could also be implemented in any other language. The code is listed below. We must take
				59	a good care that it is bug free. This is simple because the code is simple and clean.
				60
				61	Next, we must describe the syntax of our new language in itself. Once created and checked
				62	manually that it is correct, we can use it to check another scripts.
				63
				64	Note that our new language loading code does not have to check the syntax. It is because we
				65	assume that the script describing itself is correct, and other scripts can be syntactically
				66	checked by the former script. The loading code must only do semantic checking which leads us to
				67	simple resolving references.
				68
				69	THE LANGUAGE
				70	------------
				71
				72	Here I will describe the syntax of the new language (further called "Synek"). It is mainly a
				73	sequence of declarations terminated by a semicolon. The declaration consists of a symbol,
				74	which is an identifier, and its definition. A definition is in turn a sequence of specifiers
				75	connected with ".and" or ".or" operator. These operators cannot be mixed together in a one
				76	definition. Specifier can be a symbol, string, character, character range or a special
				77	keyword ".true" or ".false".
				78
				79	On the very beginning of the script there is a declaration of a root symbol and is in the form:
				80	.syntax <root_symbol>;
				81	The <root_symbol> must be on of the symbols in declaration sequence. The syntax is correct if
				82	the root symbol evaluates to true. A symbol evaluates to true if the definition associated with
				83	the symbol evaluates to true. Definition evaluation depends on the operator used to connect
				84	specifiers in the definition. If ".and" operator is used, definition evaluates to true if and
				85	only if all the specifiers evaluate to true. If ".or" operator is used, definition evalutes to
				86	true if any of the specifiers evaluates to true. If definition contains only one specifier,
				87	it is evaluated as if it was connected with ".true" keyword by ".and" operator.
				88
				89	If specifier is a ".true" keyword, it always evaluates to true.
				90
				91	If specifier is a ".false" keyword, it always evaluates to false. Specifier evaluates to false
				92	when it does not evaluate to true.
				93
				94	Character range specifier is in the form:
				95	'<first_character>' - '<second_character>'
				96	If specifier is a character range, it evaluates to true if character in the stream is greater
				97	or equal to <first_character> and less or equal to <second_character>. In that situation
				98	the stream pointer is advanced to point to next character in the stream. All C-style escape
				99	sequences are supported although trigraph sequences are not. The comparisions are performed
				100	on 8-bit unsigned integers.
				101
				102	Character specifier is in the form:
				103	'<single_character>'
				104	It evaluates to true if the following character range specifier evaluates to true:
				105	'<single_character>' - '<single_character>'
				106
				107	String specifier is in the form:
				108	"<string>"
				109	Let N be the number of characters in <string>. Let <string>[i] designate i-th character in
				110	<string>. Then the string specifier evaluates to true if and only if for i in the range [0, N)
				111	the following character specifier evaluates to true:
				112	'<string>[i]'
				113	If <string>[i] is a quotation mark, '<string>[i]' is replaced with '\<string>[i]'.
				114
				115	Symbol specifier can be optionally preceded by a ".loop" keyword in the form:
				116	.loop <symbol> (1)
				117	where <symbol> is defined as follows:
				118	<symbol> <definition>; (2)
				119	Construction (1) is replaced by the following code:
				120	<symbol$1>
				121	and declaration (2) is replaced by the following:
				122	<symbol$1> <symbol$2> .or .true;
				123	<symbol$2> <symbol> .and <symbol$1>;
				124	<symbol> <definition>;
				125
				126	Synek supports also a register mechanizm. User can, in its SYN file, declare a number of
				127	registers that can be accessed in the syn body. Each reg has its name and a default value.
				128	The register is one byte wide. The C code can change the default value by calling
				129	grammar_set_reg8() with grammar id, register name and a new value. As we know, each rule is
				130	a sequence of specifiers joined with .and or .or operator. And now each specifier can be
				131	prefixed with a condition expression in a form ".if (<reg_name> <operator> <hex_literal>)"
				132	where <operator> can be == or !=. If the condition evaluates to false, the specifier
				133	evaluates to .false. Otherwise it evalutes to the specifier.
				134
				135	ESCAPE SEQUENCES
				136	----------------
				137
				138	Synek supports all escape sequences in character specifiers. The mapping table is listed below.
				139	All occurences of the characters in the first column are replaced with the corresponding
				140	character in the second column.
				141
				142	Escape sequence Represents
				143	------------------------------------------------------------------------------------------------
				144	\a Bell (alert)
				145	\b Backspace
				146	\f Formfeed
				147	\n New line
				148	\r Carriage return
				149	\t Horizontal tab
				150	\v Vertical tab
				151	\' Single quotation mark
				152	\" Double quotation mark
				153	\\ Backslash
				154	\? Literal question mark
				155	\ooo ASCII character in octal notation
				156	\xhhh ASCII character in hexadecimal notation
				157	------------------------------------------------------------------------------------------------
				158
				159	RAISING ERRORS
				160	--------------
				161
				162	Any specifier can be followed by a special construction that is executed when the specifier
				163	evaluates to false. The construction is in the form:
				164	.error <ERROR_TEXT>
				165	<ERROR_TEXT> is an identifier declared earlier by error text declaration. The declaration is
				166	in the form:
				167	.errtext <ERROR_TEXT> "<error_desc>"
				168	When specifier evaluates to false and this construction is present, parsing is stopped
				169	immediately and <error_desc> is returned as a result of parsing. The error position is also
				170	returned and it is meant as an offset from the beggining of the stream to the character that
				171	was valid so far. Example:
				172
				173	(** syntax script **)
				174
				175	.syntax program;
				176	.errtext MISSING_SEMICOLON "missing ';'"
				177	program declaration .and .loop space .and ';' .error MISSING_SEMICOLON .and
				178	.loop space .and '\0';
				179	declaration "declare" .and .loop space .and identifier;
				180	space ' ';
				181
				182	(** sample code **)
				183
				184	declare foo ,
				185
				186	In the example above checking the sample code will result in error message "missing ';'" and
				187	error position 12. The sample code is not correct. Note the presence of '\0' specifier to
				188	assure that there is no code after semicolon - only spaces.
				189	<error_desc> can optionally contain identifier surrounded by dollar signs $. In such a case,
				190	the identifier and dollar signs are replaced by a string retrieved by invoking symbol with
				191	the identifier name. The starting position is the error position. The lenght of the resulting
				192	string is the position after invoking the symbol.
				193
				194	PRODUCTION
				195	----------
				196
				197	Synek not only checks the syntax but it can also produce (emit) bytes associated with specifiers
				198	that evaluate to true. That is, every specifier and optional error construction can be followed
				199	by a number of emit constructions that are in the form:
				200	.emit <parameter>
				201	<paramater> can be a HEX number, identifier, a star * or a dollar $. HEX number is preceded by
				202	0x or 0X. If <parameter> is an identifier, it must be earlier declared by emit code declaration
				203	in the form:
				204	.emtcode <identifier> <hex_number>
				205
				206	When given specifier evaluates to true, all emits associated with the specifier are output
				207	in order they were declared. A star means that last-read character should be output instead
				208	of constant value. Example:
				209
				210	(** syntax script **)
				211
				212	.syntax foobar;
				213	.emtcode WORD_FOO 0x01
				214	.emtcode WORD_BAR 0x02
				215	foobar FOO .emit WORD_FOO .or BAR .emit WORD_BAR .or .true .emit 0x00;
				216	FOO "foo" .and SPACE;
				217	BAR "bar" .and SPACE;
				218	SPACE ' ' .or '\0';
				219
				220	(** sample text 1 **)
				221
				222	foo
				223
				224	(** sample text 2 **)
				225
				226	foobar
				227
				228	For both samples the result will be one-element array. For first sample text it will be
				229	value 1, for second - 0. Note that every text will be accepted because of presence of
				230	.true as an alternative.
				231
				232	Another example:
				233
				234	(** syntax script **)
				235
				236	.syntax declaration;
				237	.emtcode VARIABLE 0x01
				238	declaration "declare" .and .loop space .and
				239	identifier .emit VARIABLE .and (1)
				240	.true .emit 0x00 .and (2)
				241	.loop space .and ';';
				242	space ' ' .or '\t';
				243	identifier .loop id_char .emit *; (3)
				244	id_char 'a'-'z' .or 'A'-'Z' .or '_';
				245
				246	(** sample code **)
				247
				248	declare fubar;
				249
				250	In specifier (1) symbol <identifier> is followed by .emit VARIABLE. If it evaluates to
				251	true, VARIABLE constant and then production of the symbol is output. Specifier (2) is used
				252	to terminate the string with null to signal when the string ends. Specifier (3) outputs
				253	all characters that make declared identifier. The result of sample code will be the
				254	following array:
				255	{ 1, 'f', 'u', 'b', 'a', 'r', 0 }
				256
				257	If .emit is followed by dollar $, it means that current position should be output. Current
				258	position is a 32-bit unsigned integer distance from the very beginning of the parsed string to
				259	first character consumed by the specifier associated with the .emit instruction. Current
				260	position is stored in the output buffer in Little-Endian convention (the lowest byte comes
				261	first).
				262	*/
				263
				264	static void mem_free (void **);
				265
				266	/*
				267	internal error messages
				268	*/
				269	static const byte OUT_OF_MEMORY = (byte ) "internal error 1001: out of physical memory";
				270	static const byte UNRESOLVED_REFERENCE = (byte ) "internal error 1002: unresolved reference '$'";
				271	static const byte INVALID_GRAMMAR_ID = (byte ) "internal error 1003: invalid grammar object";
				272	static const byte INVALID_REGISTER_NAME = (byte ) "internal error 1004: invalid register name: '$'";
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	273	static const byte DUPLICATE_IDENTIFIER = (byte ) "internal error 1005: identifier '$' already defined";
				274	static const byte UNREFERENCED_IDENTIFIER =(byte ) "internal error 1006: unreferenced identifier '$'";
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	275
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	276	static const byte error_message = NULL; / points to one of the error messages above */
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	277	static byte error_param = NULL; / this is inserted into error_message in place of $ */
				278	static int error_position = -1;
				279
				280	static byte unknown = (byte ) "???";
				281
				282	static void clear_last_error ()
				283	{
				284	/* reset error message */
				285	error_message = NULL;
				286
				287	/* free error parameter - if error_param is a "???" don't free it - it's static */
				288	if (error_param != unknown)
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	289	mem_free ((void *) (void ) &error_param);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	290	else
				291	error_param = NULL;
				292
				293	/* reset error position */
				294	error_position = -1;
				295	}
				296
				297	static void set_last_error (const byte msg, byte param, int pos)
				298	{
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	299	/* error message can be set only once */
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	300	if (error_message != NULL)
				301	{
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	302	mem_free ((void *) (void ) &param);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	303	return;
				304	}
				305
				306	error_message = msg;
				307
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	308	/* if param is NULL, set error_param to unknown ("???") */
				309	/* note: do not try to strdup the "???" - it may be that we are here because of */
				310	/* out of memory error so strdup can fail */
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	311	if (param != NULL)
				312	error_param = param;
				313	else
				314	error_param = unknown;
				315
				316	error_position = pos;
				317	}
				318
				319	/*
				320	memory management routines
				321	*/
				322	static void *mem_alloc (size_t size)
				323	{
				324	void *ptr = grammar_alloc_malloc (size);
				325	if (ptr == NULL)
				326	set_last_error (OUT_OF_MEMORY, NULL, -1);
				327	return ptr;
				328	}
				329
				330	static void mem_copy (void dst, const void *src, size_t size)
				331	{
				332	return grammar_memory_copy (dst, src, size);
				333	}
				334
				335	static void mem_free (void **ptr)
				336	{
				337	grammar_alloc_free (*ptr);
				338	*ptr = NULL;
				339	}
				340
				341	static void mem_realloc (void ptr, size_t old_size, size_t new_size)
				342	{
				343	void *ptr2 = grammar_alloc_realloc (ptr, old_size, new_size);
				344	if (ptr2 == NULL)
				345	set_last_error (OUT_OF_MEMORY, NULL, -1);
				346	return ptr2;
				347	}
				348
				349	static byte str_copy_n (byte dst, const byte *src, size_t max_len)
				350	{
				351	return grammar_string_copy_n (dst, src, max_len);
				352	}
				353
				354	static byte str_duplicate (const byte str)
				355	{
				356	byte *new_str = grammar_string_duplicate (str);
				357	if (new_str == NULL)
				358	set_last_error (OUT_OF_MEMORY, NULL, -1);
				359	return new_str;
				360	}
				361
				362	static int str_equal (const byte str1, const byte str2)
				363	{
				364	return grammar_string_compare (str1, str2) == 0;
				365	}
				366
				367	static int str_equal_n (const byte str1, const byte str2, unsigned int n)
				368	{
				369	return grammar_string_compare_n (str1, str2, n) == 0;
				370	}
				371
				372	static unsigned int str_length (const byte *str)
				373	{
				374	return grammar_string_length (str);
				375	}
				376
				377	/*
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	378	useful macros
				379	*/
				380	#define GRAMMAR_IMPLEMENT_LIST_APPEND(_Ty)\
				381	static void _Ty##_append (_Ty *x, _Ty nx) {\
				382	while (x) x = &(*x).next;\
				383	*x = nx;\
				384	}
				385
				386	/*
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	387	string to byte map typedef
				388	*/
				389	typedef struct map_byte_
				390	{
				391	byte *key;
				392	byte data;
				393	struct map_byte_ *next;
				394	} map_byte;
				395
				396	static void map_byte_create (map_byte **ma)
				397	{
Brian Paul	bdd15b5	2004-05-04 15:11:06 +0000	[diff] [blame]	398	ma = (map_byte ) mem_alloc (sizeof (map_byte));
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	399	if (*ma)
				400	{
				401	(**ma).key = NULL;
				402	(**ma).data = '\0';
				403	(**ma).next = NULL;
				404	}
				405	}
				406
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	407	static void map_byte_destroy (map_byte **ma)
				408	{
				409	if (*ma)
				410	{
				411	map_byte_destroy (&(**ma).next);
				412	mem_free ((void ) &(ma).key);
				413	mem_free ((void **) ma);
				414	}
				415	}
				416
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	417	GRAMMAR_IMPLEMENT_LIST_APPEND(map_byte)
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	418
				419	/*
				420	searches the map for the specified key,
				421	returns pointer to the element with the specified key if it exists
				422	returns NULL otherwise
				423	*/
Brian Paul	788461f	2004-03-29 14:53:49 +0000	[diff] [blame]	424	static map_byte map_byte_locate (map_byte ma, const byte key)
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	425	{
				426	while (*ma)
				427	{
				428	if (str_equal ((**ma).key, key))
				429	return *ma;
				430
				431	ma = &(**ma).next;
				432	}
				433
				434	set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
				435	return NULL;
				436	}
				437
				438	/*
				439	searches the map for specified key,
				440	if the key is matched, *data is filled with data associated with the key,
				441	returns 0 if the key is matched,
				442	returns 1 otherwise
				443	*/
				444	static int map_byte_find (map_byte *ma, const byte key, byte *data)
				445	{
				446	map_byte *found = map_byte_locate (ma, key);
				447	if (found != NULL)
				448	{
				449	*data = found->data;
				450
				451	return 0;
				452	}
				453
				454	return 1;
				455	}
				456
				457	/*
				458	regbyte context typedef
				459
				460	Each regbyte consists of its name and a default value. These are static and created at
				461	grammar script compile-time, for example the following line:
				462	.regbyte vertex_blend 0x00
				463	adds a new regbyte named "vertex_blend" to the static list and initializes it to 0.
				464	When the script is executed, this regbyte can be accessed by name for read and write. When a
				465	particular regbyte is written, a new regbyte_ctx entry is added to the top of the regbyte_ctx
				466	stack. The new entry contains information abot which regbyte it references and its new value.
				467	When a given regbyte is accessed for read, the stack is searched top-down to find an
				468	entry that references the regbyte. The first matching entry is used to return the current
				469	value it holds. If no entry is found, the default value is returned.
				470	*/
				471	typedef struct regbyte_ctx_
				472	{
				473	map_byte *m_regbyte;
				474	byte m_current_value;
				475	struct regbyte_ctx_ *m_prev;
				476	} regbyte_ctx;
				477
				478	static void regbyte_ctx_create (regbyte_ctx **re)
				479	{
Brian Paul	bdd15b5	2004-05-04 15:11:06 +0000	[diff] [blame]	480	re = (regbyte_ctx ) mem_alloc (sizeof (regbyte_ctx));
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	481	if (*re)
				482	{
				483	(**re).m_regbyte = NULL;
				484	(**re).m_prev = NULL;
				485	}
				486	}
				487
				488	static void regbyte_ctx_destroy (regbyte_ctx **re)
				489	{
				490	if (*re)
				491	{
				492	mem_free ((void **) re);
				493	}
				494	}
				495
				496	static byte regbyte_ctx_extract (regbyte_ctx *re, map_byte reg)
				497	{
				498	/* first lookup in the register stack */
				499	while (*re != NULL)
				500	{
				501	if ((**re).m_regbyte == reg)
				502	return (**re).m_current_value;
				503
				504	re = &(**re).m_prev;
				505	}
				506
				507	/* if not found - return the default value */
				508	return reg->data;
				509	}
				510
				511	/*
				512	emit type typedef
				513	*/
				514	typedef enum emit_type_
				515	{
				516	et_byte, /* explicit number */
				517	et_stream, /* eaten character */
				518	et_position /* current position */
				519	} emit_type;
				520
				521	/*
				522	emit destination typedef
				523	*/
				524	typedef enum emit_dest_
				525	{
				526	ed_output, /* write to the output buffer */
				527	ed_regbyte /* write a particular regbyte */
				528	} emit_dest;
				529
				530	/*
				531	emit typedef
				532	*/
				533	typedef struct emit_
				534	{
				535	emit_dest m_emit_dest;
				536	emit_type m_emit_type; /* ed_output */
				537	byte m_byte; /* et_byte */
				538	map_byte m_regbyte; / ed_regbyte */
				539	byte m_regname; / ed_regbyte - temporary */
				540	struct emit_ *m_next;
				541	} emit;
				542
				543	static void emit_create (emit **em)
				544	{
Brian Paul	bdd15b5	2004-05-04 15:11:06 +0000	[diff] [blame]	545	em = (emit ) mem_alloc (sizeof (emit));
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	546	if (*em)
				547	{
				548	(**em).m_emit_dest = ed_output;
				549	(**em).m_emit_type = et_byte;
				550	(**em).m_byte = '\0';
				551	(**em).m_regbyte = NULL;
				552	(**em).m_regname = NULL;
				553	(**em).m_next = NULL;
				554	}
				555	}
				556
				557	static void emit_destroy (emit **em)
				558	{
				559	if (*em)
				560	{
				561	emit_destroy (&(**em).m_next);
				562	mem_free ((void ) &(em).m_regname);
				563	mem_free ((void **) em);
				564	}
				565	}
				566
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	567	static unsigned int emit_size (emit *_E)
				568	{
				569	unsigned int _N = 0;
				570
				571	while (_E != NULL)
				572	{
				573	if (_E->m_emit_dest == ed_output)
				574	{
				575	if (_E->m_emit_type == et_position)
				576	_N += 4; /* position is a 32-bit unsigned integer */
				577	else
				578	_N++;
				579	}
				580	_E = _E->m_next;
				581	}
				582
				583	return _N;
				584	}
				585
				586	static int emit_push (emit _E, byte _P, byte _C, unsigned int _Pos, regbyte_ctx **_Ctx)
				587	{
				588	while (_E != NULL)
				589	{
				590	if (_E->m_emit_dest == ed_output)
				591	{
				592	if (_E->m_emit_type == et_byte)
				593	*_P++ = _E->m_byte;
				594	else if (_E->m_emit_type == et_stream)
				595	*_P++ = _C;
				596	else /* _Em->type == et_position */
				597	{
				598	*_P++ = (byte) (_Pos);
				599	*_P++ = (byte) (_Pos >> 8);
				600	*_P++ = (byte) (_Pos >> 16);
				601	*_P++ = (byte) (_Pos >> 24);
				602	}
				603	}
				604	else
				605	{
				606	regbyte_ctx *new_rbc;
				607	regbyte_ctx_create (&new_rbc);
				608	if (new_rbc == NULL)
				609	return 1;
				610
				611	new_rbc->m_prev = *_Ctx;
				612	new_rbc->m_regbyte = _E->m_regbyte;
				613	*_Ctx = new_rbc;
				614
				615	if (_E->m_emit_type == et_byte)
				616	new_rbc->m_current_value = _E->m_byte;
				617	else if (_E->m_emit_type == et_stream)
				618	new_rbc->m_current_value = _C;
				619	}
				620
				621	_E = _E->m_next;
				622	}
				623
				624	return 0;
				625	}
				626
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	627	/*
				628	error typedef
				629	*/
				630	typedef struct error_
				631	{
				632	byte *m_text;
				633	byte *m_token_name;
				634	struct rule_ *m_token;
				635	} error;
				636
				637	static void error_create (error **er)
				638	{
Brian Paul	bdd15b5	2004-05-04 15:11:06 +0000	[diff] [blame]	639	er = (error ) mem_alloc (sizeof (error));
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	640	if (*er)
				641	{
				642	(**er).m_text = NULL;
				643	(**er).m_token_name = NULL;
				644	(**er).m_token = NULL;
				645	}
				646	}
				647
				648	static void error_destroy (error **er)
				649	{
				650	if (*er)
				651	{
				652	mem_free ((void ) &(er).m_text);
				653	mem_free ((void ) &(er).m_token_name);
				654	mem_free ((void **) er);
				655	}
				656	}
				657
				658	struct dict_;
				659	static byte error_get_token (error , struct dict_ , const byte , unsigned int);
				660
				661	/*
				662	condition operand type typedef
				663	*/
				664	typedef enum cond_oper_type_
				665	{
				666	cot_byte, /* constant 8-bit unsigned integer */
				667	cot_regbyte /* pointer to byte register containing the current value */
				668	} cond_oper_type;
				669
				670	/*
				671	condition operand typedef
				672	*/
				673	typedef struct cond_oper_
				674	{
				675	cond_oper_type m_type;
				676	byte m_byte; /* cot_byte */
				677	map_byte m_regbyte; / cot_regbyte */
				678	byte m_regname; / cot_regbyte - temporary */
				679	} cond_oper;
				680
				681	/*
				682	condition type typedef
				683	*/
				684	typedef enum cond_type_
				685	{
				686	ct_equal,
				687	ct_not_equal
				688	} cond_type;
				689
				690	/*
				691	condition typedef
				692	*/
				693	typedef struct cond_
				694	{
				695	cond_type m_type;
				696	cond_oper m_operands[2];
				697	} cond;
				698
				699	static void cond_create (cond **co)
				700	{
Brian Paul	bdd15b5	2004-05-04 15:11:06 +0000	[diff] [blame]	701	co = (cond ) mem_alloc (sizeof (cond));
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	702	if (*co)
				703	{
				704	(**co).m_operands[0].m_regname = NULL;
				705	(**co).m_operands[1].m_regname = NULL;
				706	}
				707	}
				708
				709	static void cond_destroy (cond **co)
				710	{
				711	if (*co)
				712	{
				713	mem_free ((void ) &(co).m_operands[0].m_regname);
				714	mem_free ((void ) &(co).m_operands[1].m_regname);
				715	mem_free ((void **) co);
				716	}
				717	}
				718
				719	/*
				720	specifier type typedef
				721	*/
				722	typedef enum spec_type_
				723	{
				724	st_false,
				725	st_true,
				726	st_byte,
				727	st_byte_range,
				728	st_string,
				729	st_identifier,
				730	st_identifier_loop,
				731	st_debug
				732	} spec_type;
				733
				734	/*
				735	specifier typedef
				736	*/
				737	typedef struct spec_
				738	{
				739	spec_type m_spec_type;
				740	byte m_byte[2]; /* st_byte, st_byte_range */
				741	byte m_string; / st_string */
				742	struct rule_ m_rule; / st_identifier, st_identifier_loop */
				743	emit *m_emits;
				744	error *m_errtext;
				745	cond *m_cond;
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	746	struct spec_ *next;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	747	} spec;
				748
				749	static void spec_create (spec **sp)
				750	{
Brian Paul	bdd15b5	2004-05-04 15:11:06 +0000	[diff] [blame]	751	sp = (spec ) mem_alloc (sizeof (spec));
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	752	if (*sp)
				753	{
				754	(**sp).m_spec_type = st_false;
				755	(**sp).m_byte[0] = '\0';
				756	(**sp).m_byte[1] = '\0';
				757	(**sp).m_string = NULL;
				758	(**sp).m_rule = NULL;
				759	(**sp).m_emits = NULL;
				760	(**sp).m_errtext = NULL;
				761	(**sp).m_cond = NULL;
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	762	(**sp).next = NULL;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	763	}
				764	}
				765
				766	static void spec_destroy (spec **sp)
				767	{
				768	if (*sp)
				769	{
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	770	spec_destroy (&(**sp).next);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	771	emit_destroy (&(**sp).m_emits);
				772	error_destroy (&(**sp).m_errtext);
				773	mem_free ((void ) &(sp).m_string);
				774	cond_destroy (&(**sp).m_cond);
				775	mem_free ((void **) sp);
				776	}
				777	}
				778
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	779	GRAMMAR_IMPLEMENT_LIST_APPEND(spec)
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	780
				781	/*
				782	operator typedef
				783	*/
				784	typedef enum oper_
				785	{
				786	op_none,
				787	op_and,
				788	op_or
				789	} oper;
				790
				791	/*
				792	rule typedef
				793	*/
				794	typedef struct rule_
				795	{
				796	oper m_oper;
				797	spec *m_specs;
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	798	struct rule_ *next;
				799	int m_referenced;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	800	} rule;
				801
				802	static void rule_create (rule **ru)
				803	{
Brian Paul	bdd15b5	2004-05-04 15:11:06 +0000	[diff] [blame]	804	ru = (rule ) mem_alloc (sizeof (rule));
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	805	if (*ru)
				806	{
				807	(**ru).m_oper = op_none;
				808	(**ru).m_specs = NULL;
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	809	(**ru).next = NULL;
				810	(**ru).m_referenced = 0;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	811	}
				812	}
				813
				814	static void rule_destroy (rule **ru)
				815	{
				816	if (*ru)
				817	{
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	818	rule_destroy (&(**ru).next);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	819	spec_destroy (&(**ru).m_specs);
				820	mem_free ((void **) ru);
				821	}
				822	}
				823
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	824	GRAMMAR_IMPLEMENT_LIST_APPEND(rule)
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	825
				826	/*
				827	returns unique grammar id
				828	*/
				829	static grammar next_valid_grammar_id ()
				830	{
				831	static grammar id = 0;
				832
				833	return ++id;
				834	}
				835
				836	/*
				837	dictionary typedef
				838	*/
				839	typedef struct dict_
				840	{
				841	rule *m_rulez;
				842	rule *m_syntax;
				843	rule *m_string;
				844	map_byte *m_regbytes;
				845	grammar m_id;
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	846	struct dict_ *next;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	847	} dict;
				848
				849	static void dict_create (dict **di)
				850	{
Brian Paul	bdd15b5	2004-05-04 15:11:06 +0000	[diff] [blame]	851	di = (dict ) mem_alloc (sizeof (dict));
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	852	if (*di)
				853	{
				854	(**di).m_rulez = NULL;
				855	(**di).m_syntax = NULL;
				856	(**di).m_string = NULL;
				857	(**di).m_regbytes = NULL;
				858	(**di).m_id = next_valid_grammar_id ();
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	859	(**di).next = NULL;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	860	}
				861	}
				862
				863	static void dict_destroy (dict **di)
				864	{
				865	if (*di)
				866	{
				867	rule_destroy (&(**di).m_rulez);
				868	map_byte_destroy (&(**di).m_regbytes);
				869	mem_free ((void **) di);
				870	}
				871	}
				872
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	873	GRAMMAR_IMPLEMENT_LIST_APPEND(dict)
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	874
				875	static void dict_find (dict di, grammar key, dict data)
				876	{
				877	while (*di)
				878	{
				879	if ((**di).m_id == key)
				880	{
				881	data = di;
				882	return;
				883	}
				884
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	885	di = &(**di).next;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	886	}
				887
				888	*data = NULL;
				889	}
				890
				891	static dict *g_dicts = NULL;
				892
				893	/*
				894	byte array typedef
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	895	*/
				896	typedef struct barray_
				897	{
				898	byte *data;
				899	unsigned int len;
				900	} barray;
				901
				902	static void barray_create (barray **ba)
				903	{
Brian Paul	bdd15b5	2004-05-04 15:11:06 +0000	[diff] [blame]	904	ba = (barray ) mem_alloc (sizeof (barray));
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	905	if (*ba)
				906	{
				907	(**ba).data = NULL;
				908	(**ba).len = 0;
				909	}
				910	}
				911
				912	static void barray_destroy (barray **ba)
				913	{
				914	if (*ba)
				915	{
				916	mem_free ((void ) &(ba).data);
				917	mem_free ((void **) ba);
				918	}
				919	}
				920
				921	/*
				922	reallocates byte array to requested size,
				923	returns 0 on success,
				924	returns 1 otherwise
				925	*/
				926	static int barray_resize (barray **ba, unsigned int nlen)
				927	{
				928	byte *new_pointer;
				929
				930	if (nlen == 0)
				931	{
				932	mem_free ((void ) &(ba).data);
				933	(**ba).data = NULL;
				934	(**ba).len = 0;
				935
				936	return 0;
				937	}
				938	else
				939	{
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	940	new_pointer = (byte ) mem_realloc ((ba).data, (ba).len sizeof (byte),
				941	nlen * sizeof (byte));
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	942	if (new_pointer)
				943	{
				944	(**ba).data = new_pointer;
				945	(**ba).len = nlen;
				946
				947	return 0;
				948	}
				949	}
				950
				951	return 1;
				952	}
				953
				954	/*
				955	adds byte array pointed by nb to the end of array pointed by ba,
				956	returns 0 on success,
				957	returns 1 otherwise
				958	*/
				959	static int barray_append (barray ba, barray nb)
				960	{
				961	const unsigned int len = (**ba).len;
				962
				963	if (barray_resize (ba, (ba).len + (nb).len))
				964	return 1;
				965
				966	mem_copy ((ba).data + len, (nb).data, (**nb).len);
				967
				968	return 0;
				969	}
				970
				971	/*
				972	adds emit chain pointed by em to the end of array pointed by *ba,
				973	returns 0 on success,
				974	returns 1 otherwise
				975	*/
				976	static int barray_push (barray *ba, emit em, byte c, unsigned int pos, regbyte_ctx **rbc)
				977	{
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	978	unsigned int count = emit_size (em);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	979
				980	if (barray_resize (ba, (**ba).len + count))
				981	return 1;
				982
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	983	return emit_push (em, (ba).data + ((ba).len - count), c, pos, rbc);
				984	}
				985
				986	/*
				987	byte pool typedef
				988	*/
				989	typedef struct bytepool_
				990	{
				991	byte *_F;
				992	unsigned int _Siz;
				993	} bytepool;
				994
				995	static void bytepool_destroy (bytepool **by)
				996	{
				997	if (*by != NULL)
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	998	{
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	999	mem_free ((void ) &(by)._F);
				1000	mem_free ((void **) by);
				1001	}
				1002	}
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1003
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	1004	static void bytepool_create (bytepool **by, int len)
				1005	{
				1006	by = (bytepool ) (mem_alloc (sizeof (bytepool)));
				1007	if (*by != NULL)
				1008	{
				1009	(*by)._F = (byte ) (mem_alloc (sizeof (byte) * len));
				1010	(**by)._Siz = len;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1011
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	1012	if ((**by)._F == NULL)
				1013	bytepool_destroy (by);
				1014	}
				1015	}
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1016
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	1017	static int bytepool_reserve (bytepool *by, unsigned int _N)
				1018	{
				1019	byte *_P;
				1020
				1021	if (_N <= by->_Siz)
				1022	return 0;
				1023
				1024	/* byte pool can only grow and at least by doubling its size */
				1025	_N = _N >= by->_Siz * 2 ? _N : by->_Siz * 2;
				1026
				1027	/* reallocate the memory and adjust pointers to the new memory location */
				1028	_P = (byte ) (mem_realloc (by->_F, sizeof (byte) by->_Siz, sizeof (byte) * _N));
				1029	if (_P != NULL)
				1030	{
				1031	by->_F = _P;
				1032	by->_Siz = _N;
				1033	return 0;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1034	}
				1035
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	1036	return 1;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1037	}
				1038
				1039	/*
				1040	string to string map typedef
				1041	*/
				1042	typedef struct map_str_
				1043	{
				1044	byte *key;
				1045	byte *data;
				1046	struct map_str_ *next;
				1047	} map_str;
				1048
				1049	static void map_str_create (map_str **ma)
				1050	{
Brian Paul	bdd15b5	2004-05-04 15:11:06 +0000	[diff] [blame]	1051	ma = (map_str ) mem_alloc (sizeof (map_str));
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1052	if (*ma)
				1053	{
				1054	(**ma).key = NULL;
				1055	(**ma).data = NULL;
				1056	(**ma).next = NULL;
				1057	}
				1058	}
				1059
				1060	static void map_str_destroy (map_str **ma)
				1061	{
				1062	if (*ma)
				1063	{
				1064	map_str_destroy (&(**ma).next);
				1065	mem_free ((void ) &(ma).key);
				1066	mem_free ((void ) &(ma).data);
				1067	mem_free ((void **) ma);
				1068	}
				1069	}
				1070
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	1071	GRAMMAR_IMPLEMENT_LIST_APPEND(map_str)
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1072
				1073	/*
				1074	searches the map for specified key,
				1075	if the key is matched, *data is filled with data associated with the key,
				1076	returns 0 if the key is matched,
				1077	returns 1 otherwise
				1078	*/
				1079	static int map_str_find (map_str *ma, const byte key, byte **data)
				1080	{
				1081	while (*ma)
				1082	{
				1083	if (str_equal ((**ma).key, key))
				1084	{
				1085	data = str_duplicate ((*ma).data);
				1086	if (*data == NULL)
				1087	return 1;
				1088
				1089	return 0;
				1090	}
				1091
				1092	ma = &(**ma).next;
				1093	}
				1094
				1095	set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
				1096	return 1;
				1097	}
				1098
				1099	/*
				1100	string to rule map typedef
				1101	*/
				1102	typedef struct map_rule_
				1103	{
				1104	byte *key;
				1105	rule *data;
				1106	struct map_rule_ *next;
				1107	} map_rule;
				1108
				1109	static void map_rule_create (map_rule **ma)
				1110	{
Brian Paul	bdd15b5	2004-05-04 15:11:06 +0000	[diff] [blame]	1111	ma = (map_rule ) mem_alloc (sizeof (map_rule));
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1112	if (*ma)
				1113	{
				1114	(**ma).key = NULL;
				1115	(**ma).data = NULL;
				1116	(**ma).next = NULL;
				1117	}
				1118	}
				1119
				1120	static void map_rule_destroy (map_rule **ma)
				1121	{
				1122	if (*ma)
				1123	{
				1124	map_rule_destroy (&(**ma).next);
				1125	mem_free ((void ) &(ma).key);
				1126	mem_free ((void **) ma);
				1127	}
				1128	}
				1129
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	1130	GRAMMAR_IMPLEMENT_LIST_APPEND(map_rule)
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1131
				1132	/*
				1133	searches the map for specified key,
				1134	if the key is matched, *data is filled with data associated with the key,
				1135	returns 0 if the is matched,
				1136	returns 1 otherwise
				1137	*/
				1138	static int map_rule_find (map_rule *ma, const byte key, rule **data)
				1139	{
				1140	while (*ma)
				1141	{
				1142	if (str_equal ((**ma).key, key))
				1143	{
				1144	data = (*ma).data;
				1145
				1146	return 0;
				1147	}
				1148
				1149	ma = &(**ma).next;
				1150	}
				1151
				1152	set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
				1153	return 1;
				1154	}
				1155
				1156	/*
				1157	returns 1 if given character is a white space,
				1158	returns 0 otherwise
				1159	*/
				1160	static int is_space (byte c)
				1161	{
				1162	return c == ' ' \|\| c == '\t' \|\| c == '\n' \|\| c == '\r';
				1163	}
				1164
				1165	/*
				1166	advances text pointer by 1 if character pointed by *text is a space,
				1167	returns 1 if a space has been eaten,
				1168	returns 0 otherwise
				1169	*/
				1170	static int eat_space (const byte **text)
				1171	{
				1172	if (is_space (**text))
				1173	{
				1174	(*text)++;
				1175
				1176	return 1;
				1177	}
				1178
				1179	return 0;
				1180	}
				1181
				1182	/*
Brian Paul	788461f	2004-03-29 14:53:49 +0000	[diff] [blame]	1183	returns 1 if text points to C-style comment start string,
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1184	returns 0 otherwise
				1185	*/
				1186	static int is_comment_start (const byte *text)
				1187	{
				1188	return text[0] == '/' && text[1] == '*';
				1189	}
				1190
				1191	/*
				1192	advances text pointer to first character after C-style comment block - if any,
				1193	returns 1 if C-style comment block has been encountered and eaten,
				1194	returns 0 otherwise
				1195	*/
				1196	static int eat_comment (const byte **text)
				1197	{
				1198	if (is_comment_start (*text))
				1199	{
				1200	/* text points to comment block - skip two characters to enter comment body /
				1201	*text += 2;
				1202	/* skip any character except consecutive '' and '/' /
				1203	while (!((text)[0] == '' && (*text)[1] == '/'))
				1204	(*text)++;
				1205	/* skip those two terminating characters */
				1206	*text += 2;
				1207
				1208	return 1;
				1209	}
				1210
				1211	return 0;
				1212	}
				1213
				1214	/*
				1215	advances text pointer to first character that is neither space nor C-style comment block
				1216	*/
				1217	static void eat_spaces (const byte **text)
				1218	{
				1219	while (eat_space (text) \|\| eat_comment (text))
				1220	;
				1221	}
				1222
				1223	/*
				1224	resizes string pointed by *ptr to successfully add character c to the end of the string,
				1225	returns 0 on success,
				1226	returns 1 otherwise
				1227	*/
				1228	static int string_grow (byte *ptr, unsigned int len, byte c)
				1229	{
				1230	/* reallocate the string in 16-byte increments */
				1231	if ((len & 0x0F) == 0x0F \|\| ptr == NULL)
				1232	{
Brian Paul	bdd15b5	2004-05-04 15:11:06 +0000	[diff] [blame]	1233	byte tmp = (byte ) mem_realloc (ptr, ((len + 1) & ~0x0F) * sizeof (byte),
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1234	((len + 1 + 0x10) & ~0x0F) sizeof (byte));
				1235	if (tmp == NULL)
				1236	return 1;
				1237
				1238	*ptr = tmp;
				1239	}
				1240
				1241	if (c)
				1242	{
				1243	/* append given character */
				1244	(ptr)[len] = c;
				1245	(*len)++;
				1246	}
				1247	(ptr)[len] = '\0';
				1248
				1249	return 0;
				1250	}
				1251
				1252	/*
				1253	returns 1 if given character is a valid identifier character a-z, A-Z, 0-9 or _
				1254	returns 0 otherwise
				1255	*/
				1256	static int is_identifier (byte c)
				1257	{
				1258	return (c >= 'a' && c <= 'z') \|\| (c >= 'A' && c <= 'Z') \|\| (c >= '0' && c <= '9') \|\| c == '_';
				1259	}
				1260
				1261	/*
				1262	copies characters from text to id until non-identifier character is encountered,
				1263	assumes that *id points to NULL object - caller is responsible for later freeing the string,
				1264	text pointer is advanced to point past the copied identifier,
				1265	returns 0 if identifier was successfully copied,
				1266	returns 1 otherwise
				1267	*/
				1268	static int get_identifier (const byte text, byte id)
				1269	{
				1270	const byte t = text;
				1271	byte *p = NULL;
				1272	unsigned int len = 0;
				1273
				1274	if (string_grow (&p, &len, '\0'))
				1275	return 1;
				1276
				1277	/* loop while next character in buffer is valid for identifiers */
				1278	while (is_identifier (*t))
				1279	{
				1280	if (string_grow (&p, &len, *t++))
				1281	{
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	1282	mem_free ((void *) (void ) &p);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1283	return 1;
				1284	}
				1285	}
				1286
				1287	*text = t;
				1288	*id = p;
				1289
				1290	return 0;
				1291	}
				1292
				1293	/*
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	1294	converts sequence of DEC digits pointed by *text until non-DEC digit is encountered,
				1295	advances text pointer past the converted sequence,
				1296	returns the converted value
				1297	*/
				1298	static unsigned int dec_convert (const byte **text)
				1299	{
				1300	unsigned int value = 0;
				1301
				1302	while (text >= '0' && text <= '9')
				1303	{
				1304	value = value * 10 + **text - '0';
				1305	(*text)++;
				1306	}
				1307
				1308	return value;
				1309	}
				1310
				1311	/*
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1312	returns 1 if given character is HEX digit 0-9, A-F or a-f,
				1313	returns 0 otherwise
				1314	*/
				1315	static int is_hex (byte c)
				1316	{
				1317	return (c >= '0' && c <= '9') \|\| (c >= 'A' && c <= 'F') \|\| (c >= 'a' && c <= 'f');
				1318	}
				1319
				1320	/*
				1321	returns value of passed character as if it was HEX digit
				1322	*/
				1323	static unsigned int hex2dec (byte c)
				1324	{
				1325	if (c >= '0' && c <= '9')
				1326	return c - '0';
				1327	if (c >= 'A' && c <= 'F')
				1328	return c - 'A' + 10;
				1329	return c - 'a' + 10;
				1330	}
				1331
				1332	/*
				1333	converts sequence of HEX digits pointed by *text until non-HEX digit is encountered,
				1334	advances text pointer past the converted sequence,
				1335	returns the converted value
				1336	*/
				1337	static unsigned int hex_convert (const byte **text)
				1338	{
				1339	unsigned int value = 0;
				1340
				1341	while (is_hex (**text))
				1342	{
				1343	value = value * 0x10 + hex2dec (**text);
				1344	(*text)++;
				1345	}
				1346
				1347	return value;
				1348	}
				1349
				1350	/*
				1351	returns 1 if given character is OCT digit 0-7,
				1352	returns 0 otherwise
				1353	*/
				1354	static int is_oct (byte c)
				1355	{
				1356	return c >= '0' && c <= '7';
				1357	}
				1358
				1359	/*
				1360	returns value of passed character as if it was OCT digit
				1361	*/
				1362	static int oct2dec (byte c)
				1363	{
				1364	return c - '0';
				1365	}
				1366
				1367	static byte get_escape_sequence (const byte **text)
				1368	{
				1369	int value = 0;
				1370
				1371	/* skip '\' character */
				1372	(*text)++;
				1373
				1374	switch ((text)++)
				1375	{
				1376	case '\'':
				1377	return '\'';
				1378	case '"':
				1379	return '\"';
				1380	case '?':
				1381	return '\?';
				1382	case '\\':
				1383	return '\\';
				1384	case 'a':
				1385	return '\a';
				1386	case 'b':
				1387	return '\b';
				1388	case 'f':
				1389	return '\f';
				1390	case 'n':
				1391	return '\n';
				1392	case 'r':
				1393	return '\r';
				1394	case 't':
				1395	return '\t';
				1396	case 'v':
				1397	return '\v';
				1398	case 'x':
				1399	return (byte) hex_convert (text);
				1400	}
				1401
				1402	(*text)--;
				1403	if (is_oct (**text))
				1404	{
				1405	value = oct2dec ((text)++);
				1406	if (is_oct (**text))
				1407	{
				1408	value = value * 010 + oct2dec ((text)++);
				1409	if (is_oct (**text))
				1410	value = value * 010 + oct2dec ((text)++);
				1411	}
				1412	}
				1413
				1414	return (byte) value;
				1415	}
				1416
				1417	/*
				1418	copies characters from text to str until " or ' character is encountered,
				1419	assumes that *str points to NULL object - caller is responsible for later freeing the string,
				1420	assumes that *text points to " or ' character that starts the string,
				1421	text pointer is advanced to point past the " or ' character,
				1422	returns 0 if string was successfully copied,
				1423	returns 1 otherwise
				1424	*/
				1425	static int get_string (const byte text, byte str)
				1426	{
				1427	const byte t = text;
				1428	byte *p = NULL;
				1429	unsigned int len = 0;
				1430	byte term_char;
				1431
				1432	if (string_grow (&p, &len, '\0'))
				1433	return 1;
				1434
				1435	/* read " or ' character that starts the string */
				1436	term_char = *t++;
				1437	/* while next character is not the terminating character */
				1438	while (t && t != term_char)
				1439	{
				1440	byte c;
				1441
				1442	if (*t == '\\')
				1443	c = get_escape_sequence (&t);
				1444	else
				1445	c = *t++;
				1446
				1447	if (string_grow (&p, &len, c))
				1448	{
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	1449	mem_free ((void *) (void ) &p);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1450	return 1;
				1451	}
				1452	}
				1453	/* skip " or ' character that ends the string */
				1454	t++;
				1455
				1456	*text = t;
				1457	*str = p;
				1458	return 0;
				1459	}
				1460
				1461	/*
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	1462	gets emit code, the syntax is:
				1463	".emtcode" " " <symbol> " " (("0x" \| "0X") <hex_value>) \| <dec_value> \| <character>
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1464	assumes that *text already points to <symbol>,
				1465	returns 0 if emit code is successfully read,
				1466	returns 1 otherwise
				1467	*/
				1468	static int get_emtcode (const byte text, map_byte ma)
				1469	{
				1470	const byte t = text;
				1471	map_byte *m = NULL;
				1472
				1473	map_byte_create (&m);
				1474	if (m == NULL)
				1475	return 1;
				1476
				1477	if (get_identifier (&t, &m->key))
				1478	{
				1479	map_byte_destroy (&m);
				1480	return 1;
				1481	}
				1482	eat_spaces (&t);
				1483
				1484	if (*t == '\'')
				1485	{
				1486	byte *c;
				1487
				1488	if (get_string (&t, &c))
				1489	{
				1490	map_byte_destroy (&m);
				1491	return 1;
				1492	}
				1493
				1494	m->data = (byte) c[0];
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	1495	mem_free ((void *) (void ) &c);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1496	}
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	1497	else if (t[0] == '0' && (t[1] == 'x' \|\| t[1] == 'X'))
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1498	{
				1499	/* skip HEX "0x" or "0X" prefix */
				1500	t += 2;
				1501	m->data = (byte) hex_convert (&t);
				1502	}
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	1503	else
				1504	{
				1505	m->data = (byte) dec_convert (&t);
				1506	}
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1507
				1508	eat_spaces (&t);
				1509
				1510	*text = t;
				1511	*ma = m;
				1512	return 0;
				1513	}
				1514
				1515	/*
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	1516	gets regbyte declaration, the syntax is:
				1517	".regbyte" " " <symbol> " " (("0x" \| "0X") <hex_value>) \| <dec_value> \| <character>
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1518	assumes that *text already points to <symbol>,
				1519	returns 0 if regbyte is successfully read,
				1520	returns 1 otherwise
				1521	*/
				1522	static int get_regbyte (const byte text, map_byte ma)
				1523	{
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	1524	/* pass it to the emtcode parser as it has the same syntax starting at <symbol> */
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1525	return get_emtcode (text, ma);
				1526	}
				1527
				1528	/*
				1529	returns 0 on success,
				1530	returns 1 otherwise
				1531	*/
				1532	static int get_errtext (const byte text, map_str ma)
				1533	{
				1534	const byte t = text;
				1535	map_str *m = NULL;
				1536
				1537	map_str_create (&m);
				1538	if (m == NULL)
				1539	return 1;
				1540
				1541	if (get_identifier (&t, &m->key))
				1542	{
				1543	map_str_destroy (&m);
				1544	return 1;
				1545	}
				1546	eat_spaces (&t);
				1547
				1548	if (get_string (&t, &m->data))
				1549	{
				1550	map_str_destroy (&m);
				1551	return 1;
				1552	}
				1553	eat_spaces (&t);
				1554
				1555	*text = t;
				1556	*ma = m;
				1557	return 0;
				1558	}
				1559
				1560	/*
				1561	returns 0 on success,
				1562	returns 1 otherwise,
				1563	*/
				1564	static int get_error (const byte text, error er, map_str *maps)
				1565	{
				1566	const byte t = text;
				1567	byte *temp = NULL;
				1568
				1569	if (*t != '.')
				1570	return 0;
				1571
				1572	t++;
				1573	if (get_identifier (&t, &temp))
				1574	return 1;
				1575	eat_spaces (&t);
				1576
				1577	if (!str_equal ((byte *) "error", temp))
				1578	{
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	1579	mem_free ((void *) (void ) &temp);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1580	return 0;
				1581	}
				1582
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	1583	mem_free ((void *) (void ) &temp);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1584
				1585	error_create (er);
				1586	if (*er == NULL)
				1587	return 1;
				1588
				1589	if (*t == '\"')
				1590	{
				1591	if (get_string (&t, &(**er).m_text))
				1592	{
				1593	error_destroy (er);
				1594	return 1;
				1595	}
				1596	eat_spaces (&t);
				1597	}
				1598	else
				1599	{
				1600	if (get_identifier (&t, &temp))
				1601	{
				1602	error_destroy (er);
				1603	return 1;
				1604	}
				1605	eat_spaces (&t);
				1606
				1607	if (map_str_find (&maps, temp, &(**er).m_text))
				1608	{
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	1609	mem_free ((void *) (void ) &temp);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1610	error_destroy (er);
				1611	return 1;
				1612	}
				1613
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	1614	mem_free ((void *) (void ) &temp);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1615	}
				1616
				1617	/* try to extract "token" from "...$token$..." */
				1618	{
				1619	byte *processed = NULL;
				1620	unsigned int len = 0, i = 0;
				1621
				1622	if (string_grow (&processed, &len, '\0'))
				1623	{
				1624	error_destroy (er);
				1625	return 1;
				1626	}
				1627
				1628	while (i < str_length ((**er).m_text))
				1629	{
				1630	/* check if the dollar sign is repeated - if so skip it */
				1631	if ((er).m_text[i] == '$' && (er).m_text[i + 1] == '$')
				1632	{
				1633	if (string_grow (&processed, &len, '$'))
				1634	{
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	1635	mem_free ((void *) (void ) &processed);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1636	error_destroy (er);
				1637	return 1;
				1638	}
				1639
				1640	i += 2;
				1641	}
				1642	else if ((**er).m_text[i] != '$')
				1643	{
				1644	if (string_grow (&processed, &len, (**er).m_text[i]))
				1645	{
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	1646	mem_free ((void *) (void ) &processed);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1647	error_destroy (er);
				1648	return 1;
				1649	}
				1650
				1651	i++;
				1652	}
				1653	else
				1654	{
				1655	if (string_grow (&processed, &len, '$'))
				1656	{
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	1657	mem_free ((void *) (void ) &processed);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1658	error_destroy (er);
				1659	return 1;
				1660	}
				1661
				1662	{
				1663	/* length of token being extracted */
				1664	unsigned int tlen = 0;
				1665
				1666	if (string_grow (&(**er).m_token_name, &tlen, '\0'))
				1667	{
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	1668	mem_free ((void *) (void ) &processed);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1669	error_destroy (er);
				1670	return 1;
				1671	}
				1672
				1673	/* skip the dollar sign */
				1674	i++;
				1675
				1676	while ((**er).m_text[i] != '$')
				1677	{
				1678	if (string_grow (&(er).m_token_name, &tlen, (er).m_text[i]))
				1679	{
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	1680	mem_free ((void *) (void ) &processed);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1681	error_destroy (er);
				1682	return 1;
				1683	}
				1684
				1685	i++;
				1686	}
				1687
				1688	/* skip the dollar sign */
				1689	i++;
				1690	}
				1691	}
				1692	}
				1693
				1694	mem_free ((void ) &(er).m_text);
				1695	(**er).m_text = processed;
				1696	}
				1697
				1698	*text = t;
				1699	return 0;
				1700	}
				1701
				1702	/*
				1703	returns 0 on success,
				1704	returns 1 otherwise,
				1705	*/
				1706	static int get_emits (const byte text, emit em, map_byte *mapb)
				1707	{
				1708	const byte t = text;
				1709	byte *temp = NULL;
				1710	emit *e = NULL;
				1711	emit_dest dest;
				1712
				1713	if (*t != '.')
				1714	return 0;
				1715
				1716	t++;
				1717	if (get_identifier (&t, &temp))
				1718	return 1;
				1719	eat_spaces (&t);
				1720
				1721	/* .emit */
				1722	if (str_equal ((byte *) "emit", temp))
				1723	dest = ed_output;
				1724	/* .load */
				1725	else if (str_equal ((byte *) "load", temp))
				1726	dest = ed_regbyte;
				1727	else
				1728	{
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	1729	mem_free ((void *) (void ) &temp);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1730	return 0;
				1731	}
				1732
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	1733	mem_free ((void *) (void ) &temp);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1734
				1735	emit_create (&e);
				1736	if (e == NULL)
				1737	return 1;
				1738
				1739	e->m_emit_dest = dest;
				1740
				1741	if (dest == ed_regbyte)
				1742	{
				1743	if (get_identifier (&t, &e->m_regname))
				1744	{
				1745	emit_destroy (&e);
				1746	return 1;
				1747	}
				1748	eat_spaces (&t);
				1749	}
				1750
				1751	/* 0xNN */
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	1752	if (*t == '0' && (t[1] == 'x' \|\| t[1] == 'X'))
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1753	{
				1754	t += 2;
				1755	e->m_byte = (byte) hex_convert (&t);
				1756
				1757	e->m_emit_type = et_byte;
				1758	}
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	1759	/* NNN */
				1760	else if (t >= '0' && t <= '9')
				1761	{
				1762	e->m_byte = (byte) dec_convert (&t);
				1763
				1764	e->m_emit_type = et_byte;
				1765	}
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1766	/* * */
				1767	else if (t == '')
				1768	{
				1769	t++;
				1770
				1771	e->m_emit_type = et_stream;
				1772	}
				1773	/* $ */
				1774	else if (*t == '$')
				1775	{
				1776	t++;
				1777
				1778	e->m_emit_type = et_position;
				1779	}
				1780	/* 'c' */
				1781	else if (*t == '\'')
				1782	{
				1783	if (get_string (&t, &temp))
				1784	{
				1785	emit_destroy (&e);
				1786	return 1;
				1787	}
				1788	e->m_byte = (byte) temp[0];
				1789
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	1790	mem_free ((void *) (void ) &temp);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1791
				1792	e->m_emit_type = et_byte;
				1793	}
				1794	else
				1795	{
				1796	if (get_identifier (&t, &temp))
				1797	{
				1798	emit_destroy (&e);
				1799	return 1;
				1800	}
				1801
				1802	if (map_byte_find (&mapb, temp, &e->m_byte))
				1803	{
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	1804	mem_free ((void *) (void ) &temp);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1805	emit_destroy (&e);
				1806	return 1;
				1807	}
				1808
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	1809	mem_free ((void *) (void ) &temp);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1810
				1811	e->m_emit_type = et_byte;
				1812	}
				1813
				1814	eat_spaces (&t);
				1815
				1816	if (get_emits (&t, &e->m_next, mapb))
				1817	{
				1818	emit_destroy (&e);
				1819	return 1;
				1820	}
				1821
				1822	*text = t;
				1823	*em = e;
				1824	return 0;
				1825	}
				1826
				1827	/*
				1828	returns 0 on success,
				1829	returns 1 otherwise,
				1830	*/
				1831	static int get_spec (const byte text, spec sp, map_str maps, map_byte mapb)
				1832	{
				1833	const byte t = text;
				1834	spec *s = NULL;
				1835
				1836	spec_create (&s);
				1837	if (s == NULL)
				1838	return 1;
				1839
				1840	/* first - read optional .if statement */
				1841	if (*t == '.')
				1842	{
				1843	const byte *u = t;
				1844	byte *keyword = NULL;
				1845
				1846	/* skip the dot */
				1847	u++;
				1848
				1849	if (get_identifier (&u, &keyword))
				1850	{
				1851	spec_destroy (&s);
				1852	return 1;
				1853	}
				1854
				1855	/* .if */
				1856	if (str_equal ((byte *) "if", keyword))
				1857	{
				1858	cond_create (&s->m_cond);
				1859	if (s->m_cond == NULL)
				1860	{
				1861	spec_destroy (&s);
				1862	return 1;
				1863	}
				1864
				1865	/* skip the left paren */
				1866	eat_spaces (&u);
				1867	u++;
				1868
				1869	/* get the left operand */
				1870	eat_spaces (&u);
				1871	if (get_identifier (&u, &s->m_cond->m_operands[0].m_regname))
				1872	{
				1873	spec_destroy (&s);
				1874	return 1;
				1875	}
				1876	s->m_cond->m_operands[0].m_type = cot_regbyte;
				1877
				1878	/* get the operator (!= or ==) */
				1879	eat_spaces (&u);
				1880	if (*u == '!')
				1881	s->m_cond->m_type = ct_not_equal;
				1882	else
				1883	s->m_cond->m_type = ct_equal;
				1884	u += 2;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1885	eat_spaces (&u);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1886
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	1887	if (u[0] == '0' && (u[1] == 'x' \|\| u[1] == 'X'))
				1888	{
				1889	/* skip the 0x prefix */
				1890	u += 2;
				1891
				1892	/* get the right operand */
				1893	s->m_cond->m_operands[1].m_byte = hex_convert (&u);
				1894	s->m_cond->m_operands[1].m_type = cot_byte;
				1895	}
				1896	else /if (u >= '0' && u <= '9')/
				1897	{
				1898	/* get the right operand */
				1899	s->m_cond->m_operands[1].m_byte = dec_convert (&u);
				1900	s->m_cond->m_operands[1].m_type = cot_byte;
				1901	}
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1902
				1903	/* skip the right paren */
				1904	eat_spaces (&u);
				1905	u++;
				1906
				1907	eat_spaces (&u);
				1908
				1909	t = u;
				1910	}
				1911
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	1912	mem_free ((void *) (void ) &keyword);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1913	}
				1914
				1915	if (*t == '\'')
				1916	{
				1917	byte *temp = NULL;
				1918
				1919	if (get_string (&t, &temp))
				1920	{
				1921	spec_destroy (&s);
				1922	return 1;
				1923	}
				1924	eat_spaces (&t);
				1925
				1926	if (*t == '-')
				1927	{
				1928	byte *temp2 = NULL;
				1929
				1930	/* skip the '-' character */
				1931	t++;
				1932	eat_spaces (&t);
				1933
				1934	if (get_string (&t, &temp2))
				1935	{
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	1936	mem_free ((void *) (void ) &temp);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1937	spec_destroy (&s);
				1938	return 1;
				1939	}
				1940	eat_spaces (&t);
				1941
				1942	s->m_spec_type = st_byte_range;
				1943	s->m_byte[0] = *temp;
				1944	s->m_byte[1] = *temp2;
				1945
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	1946	mem_free ((void *) (void ) &temp2);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1947	}
				1948	else
				1949	{
				1950	s->m_spec_type = st_byte;
				1951	s->m_byte = temp;
				1952	}
				1953
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	1954	mem_free ((void *) (void ) &temp);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	1955	}
				1956	else if (*t == '"')
				1957	{
				1958	if (get_string (&t, &s->m_string))
				1959	{
				1960	spec_destroy (&s);
				1961	return 1;
				1962	}
				1963	eat_spaces (&t);
				1964
				1965	s->m_spec_type = st_string;
				1966	}
				1967	else if (*t == '.')
				1968	{
				1969	byte *keyword = NULL;
				1970
				1971	/* skip the dot */
				1972	t++;
				1973
				1974	if (get_identifier (&t, &keyword))
				1975	{
				1976	spec_destroy (&s);
				1977	return 1;
				1978	}
				1979	eat_spaces (&t);
				1980
				1981	/* .true */
				1982	if (str_equal ((byte *) "true", keyword))
				1983	{
				1984	s->m_spec_type = st_true;
				1985	}
				1986	/* .false */
				1987	else if (str_equal ((byte *) "false", keyword))
				1988	{
				1989	s->m_spec_type = st_false;
				1990	}
				1991	/* .debug */
				1992	else if (str_equal ((byte *) "debug", keyword))
				1993	{
				1994	s->m_spec_type = st_debug;
				1995	}
				1996	/* .loop */
				1997	else if (str_equal ((byte *) "loop", keyword))
				1998	{
				1999	if (get_identifier (&t, &s->m_string))
				2000	{
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	2001	mem_free ((void *) (void ) &keyword);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2002	spec_destroy (&s);
				2003	return 1;
				2004	}
				2005	eat_spaces (&t);
				2006
				2007	s->m_spec_type = st_identifier_loop;
				2008	}
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	2009	mem_free ((void *) (void ) &keyword);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2010	}
				2011	else
				2012	{
				2013	if (get_identifier (&t, &s->m_string))
				2014	{
				2015	spec_destroy (&s);
				2016	return 1;
				2017	}
				2018	eat_spaces (&t);
				2019
				2020	s->m_spec_type = st_identifier;
				2021	}
				2022
				2023	if (get_error (&t, &s->m_errtext, maps))
				2024	{
				2025	spec_destroy (&s);
				2026	return 1;
				2027	}
				2028
				2029	if (get_emits (&t, &s->m_emits, mapb))
				2030	{
				2031	spec_destroy (&s);
				2032	return 1;
				2033	}
				2034
				2035	*text = t;
				2036	*sp = s;
				2037	return 0;
				2038	}
				2039
				2040	/*
				2041	returns 0 on success,
				2042	returns 1 otherwise,
				2043	*/
				2044	static int get_rule (const byte text, rule ru, map_str maps, map_byte mapb)
				2045	{
				2046	const byte t = text;
				2047	rule *r = NULL;
				2048
				2049	rule_create (&r);
				2050	if (r == NULL)
				2051	return 1;
				2052
				2053	if (get_spec (&t, &r->m_specs, maps, mapb))
				2054	{
				2055	rule_destroy (&r);
				2056	return 1;
				2057	}
				2058
				2059	while (*t != ';')
				2060	{
				2061	byte *op = NULL;
				2062	spec *sp = NULL;
				2063
				2064	/* skip the dot that precedes "and" or "or" */
				2065	t++;
				2066
				2067	/* read "and" or "or" keyword */
				2068	if (get_identifier (&t, &op))
				2069	{
				2070	rule_destroy (&r);
				2071	return 1;
				2072	}
				2073	eat_spaces (&t);
				2074
				2075	if (r->m_oper == op_none)
				2076	{
				2077	/* .and */
				2078	if (str_equal ((byte *) "and", op))
				2079	r->m_oper = op_and;
				2080	/* .or */
				2081	else
				2082	r->m_oper = op_or;
				2083	}
				2084
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	2085	mem_free ((void *) (void ) &op);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2086
				2087	if (get_spec (&t, &sp, maps, mapb))
				2088	{
				2089	rule_destroy (&r);
				2090	return 1;
				2091	}
				2092
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	2093	spec_append (&r->m_specs, sp);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2094	}
				2095
				2096	/* skip the semicolon */
				2097	t++;
				2098	eat_spaces (&t);
				2099
				2100	*text = t;
				2101	*ru = r;
				2102	return 0;
				2103	}
				2104
				2105	/*
				2106	returns 0 on success,
				2107	returns 1 otherwise,
				2108	*/
				2109	static int update_dependency (map_rule mapr, byte symbol, rule **ru)
				2110	{
				2111	if (map_rule_find (&mapr, symbol, ru))
				2112	return 1;
				2113
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	2114	(**ru).m_referenced = 1;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2115
				2116	return 0;
				2117	}
				2118
				2119	/*
				2120	returns 0 on success,
				2121	returns 1 otherwise,
				2122	*/
				2123	static int update_dependencies (dict di, map_rule mapr, byte **syntax_symbol,
				2124	byte *string_symbol, map_byte regbytes)
				2125	{
				2126	rule *rulez = di->m_rulez;
				2127
				2128	/* update dependecies for the root and lexer symbols */
				2129	if (update_dependency (mapr, *syntax_symbol, &di->m_syntax) \|\|
				2130	(string_symbol != NULL && update_dependency (mapr, string_symbol, &di->m_string)))
				2131	return 1;
				2132
				2133	mem_free ((void **) syntax_symbol);
				2134	mem_free ((void **) string_symbol);
				2135
				2136	/* update dependecies for the rest of the rules */
				2137	while (rulez)
				2138	{
				2139	spec *sp = rulez->m_specs;
				2140
				2141	/* iterate through all the specifiers */
				2142	while (sp)
				2143	{
				2144	/* update dependency for identifier */
				2145	if (sp->m_spec_type == st_identifier \|\| sp->m_spec_type == st_identifier_loop)
				2146	{
				2147	if (update_dependency (mapr, sp->m_string, &sp->m_rule))
				2148	return 1;
				2149
				2150	mem_free ((void **) &sp->m_string);
				2151	}
				2152
				2153	/* some errtexts reference to a rule */
				2154	if (sp->m_errtext && sp->m_errtext->m_token_name)
				2155	{
				2156	if (update_dependency (mapr, sp->m_errtext->m_token_name, &sp->m_errtext->m_token))
				2157	return 1;
				2158
				2159	mem_free ((void **) &sp->m_errtext->m_token_name);
				2160	}
				2161
				2162	/* update dependency for condition */
				2163	if (sp->m_cond)
				2164	{
				2165	int i;
				2166	for (i = 0; i < 2; i++)
				2167	if (sp->m_cond->m_operands[i].m_type == cot_regbyte)
				2168	{
				2169	sp->m_cond->m_operands[i].m_regbyte = map_byte_locate (&regbytes,
				2170	sp->m_cond->m_operands[i].m_regname);
				2171
				2172	if (sp->m_cond->m_operands[i].m_regbyte == NULL)
				2173	return 1;
				2174
				2175	mem_free ((void **) &sp->m_cond->m_operands[i].m_regname);
				2176	}
				2177	}
				2178
				2179	/* update dependency for all .load instructions */
				2180	if (sp->m_emits)
				2181	{
				2182	emit *em = sp->m_emits;
				2183	while (em != NULL)
				2184	{
				2185	if (em->m_emit_dest == ed_regbyte)
				2186	{
				2187	em->m_regbyte = map_byte_locate (&regbytes, em->m_regname);
				2188
				2189	if (em->m_regbyte == NULL)
				2190	return 1;
				2191
				2192	mem_free ((void **) &em->m_regname);
				2193	}
				2194
				2195	em = em->m_next;
				2196	}
				2197	}
				2198
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	2199	sp = sp->next;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2200	}
				2201
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	2202	rulez = rulez->next;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2203	}
				2204
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	2205	/* check for unreferenced symbols */
				2206	rulez = di->m_rulez;
				2207	while (rulez != NULL)
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2208	{
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	2209	if (!rulez->m_referenced)
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2210	{
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	2211	map_rule *ma = mapr;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2212	while (ma)
				2213	{
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	2214	if (ma->data == rulez)
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2215	{
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	2216	set_last_error (UNREFERENCED_IDENTIFIER, str_duplicate (ma->key), -1);
				2217	return 1;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2218	}
				2219	ma = ma->next;
				2220	}
				2221	}
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	2222	rulez = rulez->next;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2223	}
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	2224
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2225	return 0;
				2226	}
				2227
				2228	static int satisfies_condition (cond co, regbyte_ctx ctx)
				2229	{
				2230	byte values[2];
				2231	int i;
				2232
				2233	if (co == NULL)
				2234	return 1;
				2235
				2236	for (i = 0; i < 2; i++)
				2237	switch (co->m_operands[i].m_type)
				2238	{
				2239	case cot_byte:
				2240	values[i] = co->m_operands[i].m_byte;
				2241	break;
				2242	case cot_regbyte:
				2243	values[i] = regbyte_ctx_extract (&ctx, co->m_operands[i].m_regbyte);
				2244	break;
				2245	}
				2246
				2247	switch (co->m_type)
				2248	{
				2249	case ct_equal:
				2250	return values[0] == values[1];
				2251	case ct_not_equal:
				2252	return values[0] != values[1];
				2253	}
				2254
				2255	return 0;
				2256	}
				2257
				2258	static void free_regbyte_ctx_stack (regbyte_ctx top, regbyte_ctx limit)
				2259	{
				2260	while (top != limit)
				2261	{
				2262	regbyte_ctx *rbc = top->m_prev;
				2263	regbyte_ctx_destroy (&top);
				2264	top = rbc;
				2265	}
				2266	}
				2267
				2268	typedef enum match_result_
				2269	{
				2270	mr_not_matched, /* the examined string does not match */
				2271	mr_matched, /* the examined string matches */
				2272	mr_error_raised, /* mr_not_matched + error has been raised */
				2273	mr_dont_emit, /* used by identifier loops only */
				2274	mr_internal_error /* an internal error has occured such as out of memory */
				2275	} match_result;
				2276
				2277	/*
				2278	This function does the main job. It parses the text and generates output data.
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2279	*/
				2280	static match_result match (dict di, const byte text, unsigned int index, rule ru, barray **ba,
				2281	int filtering_string, regbyte_ctx **rbc)
				2282	{
				2283	unsigned int ind = *index;
				2284	match_result status = mr_not_matched;
				2285	spec *sp = ru->m_specs;
				2286	regbyte_ctx ctx = rbc;
				2287
				2288	/* for every specifier in the rule */
				2289	while (sp)
				2290	{
				2291	unsigned int i, len, save_ind = ind;
				2292	barray *array = NULL;
				2293
				2294	if (satisfies_condition (sp->m_cond, ctx))
				2295	{
				2296	switch (sp->m_spec_type)
				2297	{
				2298	case st_identifier:
				2299	barray_create (&array);
				2300	if (array == NULL)
				2301	{
				2302	free_regbyte_ctx_stack (ctx, *rbc);
				2303	return mr_internal_error;
				2304	}
				2305
				2306	status = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	2307
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2308	if (status == mr_internal_error)
				2309	{
				2310	free_regbyte_ctx_stack (ctx, *rbc);
				2311	barray_destroy (&array);
				2312	return mr_internal_error;
				2313	}
				2314	break;
				2315	case st_string:
				2316	len = str_length (sp->m_string);
				2317
				2318	/* prefilter the stream */
				2319	if (!filtering_string && di->m_string)
				2320	{
				2321	barray *ba;
				2322	unsigned int filter_index = 0;
				2323	match_result result;
				2324	regbyte_ctx *null_ctx = NULL;
				2325
				2326	barray_create (&ba);
				2327	if (ba == NULL)
				2328	{
				2329	free_regbyte_ctx_stack (ctx, *rbc);
				2330	return mr_internal_error;
				2331	}
				2332
				2333	result = match (di, text + ind, &filter_index, di->m_string, &ba, 1, &null_ctx);
				2334
				2335	if (result == mr_internal_error)
				2336	{
				2337	free_regbyte_ctx_stack (ctx, *rbc);
				2338	barray_destroy (&ba);
				2339	return mr_internal_error;
				2340	}
				2341
				2342	if (result != mr_matched)
				2343	{
				2344	barray_destroy (&ba);
				2345	status = mr_not_matched;
				2346	break;
				2347	}
				2348
				2349	barray_destroy (&ba);
				2350
				2351	if (filter_index != len \|\| !str_equal_n (sp->m_string, text + ind, len))
				2352	{
				2353	status = mr_not_matched;
				2354	break;
				2355	}
				2356
				2357	status = mr_matched;
				2358	ind += len;
				2359	}
				2360	else
				2361	{
				2362	status = mr_matched;
				2363	for (i = 0; status == mr_matched && i < len; i++)
				2364	if (text[ind + i] != sp->m_string[i])
				2365	status = mr_not_matched;
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	2366
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2367	if (status == mr_matched)
				2368	ind += len;
				2369	}
				2370	break;
				2371	case st_byte:
				2372	status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
				2373	if (status == mr_matched)
				2374	ind++;
				2375	break;
				2376	case st_byte_range:
				2377	status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
				2378	mr_matched : mr_not_matched;
				2379	if (status == mr_matched)
				2380	ind++;
				2381	break;
				2382	case st_true:
				2383	status = mr_matched;
				2384	break;
				2385	case st_false:
				2386	status = mr_not_matched;
				2387	break;
				2388	case st_debug:
				2389	status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
				2390	break;
				2391	case st_identifier_loop:
				2392	barray_create (&array);
				2393	if (array == NULL)
				2394	{
				2395	free_regbyte_ctx_stack (ctx, *rbc);
				2396	return mr_internal_error;
				2397	}
				2398
				2399	status = mr_dont_emit;
				2400	for (;;)
				2401	{
				2402	match_result result;
				2403
				2404	save_ind = ind;
				2405	result = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
				2406
				2407	if (result == mr_error_raised)
				2408	{
				2409	status = result;
				2410	break;
				2411	}
				2412	else if (result == mr_matched)
				2413	{
				2414	if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx) \|\|
				2415	barray_append (ba, &array))
				2416	{
				2417	free_regbyte_ctx_stack (ctx, *rbc);
				2418	barray_destroy (&array);
				2419	return mr_internal_error;
				2420	}
				2421	barray_destroy (&array);
				2422	barray_create (&array);
				2423	if (array == NULL)
				2424	{
				2425	free_regbyte_ctx_stack (ctx, *rbc);
				2426	return mr_internal_error;
				2427	}
				2428	}
				2429	else if (result == mr_internal_error)
				2430	{
				2431	free_regbyte_ctx_stack (ctx, *rbc);
				2432	barray_destroy (&array);
				2433	return mr_internal_error;
				2434	}
				2435	else
				2436	break;
				2437	}
				2438	break;
				2439	}
				2440	}
				2441	else
				2442	{
				2443	status = mr_not_matched;
				2444	}
				2445
				2446	if (status == mr_error_raised)
				2447	{
				2448	free_regbyte_ctx_stack (ctx, *rbc);
				2449	barray_destroy (&array);
				2450
				2451	return mr_error_raised;
				2452	}
				2453
				2454	if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
				2455	{
				2456	free_regbyte_ctx_stack (ctx, *rbc);
				2457	barray_destroy (&array);
				2458
				2459	if (sp->m_errtext)
				2460	{
				2461	set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
				2462	ind), ind);
				2463
				2464	return mr_error_raised;
				2465	}
				2466
				2467	return mr_not_matched;
				2468	}
				2469
				2470	if (status == mr_matched)
				2471	{
				2472	if (sp->m_emits)
				2473	if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx))
				2474	{
				2475	free_regbyte_ctx_stack (ctx, *rbc);
				2476	barray_destroy (&array);
				2477	return mr_internal_error;
				2478	}
				2479
				2480	if (array)
				2481	if (barray_append (ba, &array))
				2482	{
				2483	free_regbyte_ctx_stack (ctx, *rbc);
				2484	barray_destroy (&array);
				2485	return mr_internal_error;
				2486	}
				2487	}
				2488
				2489	barray_destroy (&array);
				2490
				2491	/* if the rule operator is a logical or, we pick up the first matching specifier */
				2492	if (ru->m_oper == op_or && (status == mr_matched \|\| status == mr_dont_emit))
				2493	{
				2494	*index = ind;
				2495	*rbc = ctx;
				2496	return mr_matched;
				2497	}
				2498
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	2499	sp = sp->next;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2500	}
				2501
				2502	/* everything went fine - all specifiers match up */
				2503	if (ru->m_oper == op_and && (status == mr_matched \|\| status == mr_dont_emit))
				2504	{
				2505	*index = ind;
				2506	*rbc = ctx;
				2507	return mr_matched;
				2508	}
				2509
				2510	free_regbyte_ctx_stack (ctx, *rbc);
				2511	return mr_not_matched;
				2512	}
				2513
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	2514	static match_result fast_match (dict di, const byte text, unsigned int index, rule ru, int _PP, bytepool _BP,
				2515	int filtering_string, regbyte_ctx **rbc)
				2516	{
				2517	unsigned int ind = *index;
				2518	int _P = filtering_string ? 0 : *_PP;
				2519	int _P2;
				2520	match_result status = mr_not_matched;
				2521	spec *sp = ru->m_specs;
				2522	regbyte_ctx ctx = rbc;
				2523
				2524	/* for every specifier in the rule */
				2525	while (sp)
				2526	{
				2527	unsigned int i, len, save_ind = ind;
				2528
				2529	_P2 = _P + (sp->m_emits ? emit_size (sp->m_emits) : 0);
				2530	if (bytepool_reserve (_BP, _P2))
				2531	{
				2532	free_regbyte_ctx_stack (ctx, *rbc);
				2533	return mr_internal_error;
				2534	}
				2535
				2536	if (satisfies_condition (sp->m_cond, ctx))
				2537	{
				2538	switch (sp->m_spec_type)
				2539	{
				2540	case st_identifier:
				2541	status = fast_match (di, text, &ind, sp->m_rule, &_P2, _BP, filtering_string, &ctx);
				2542
				2543	if (status == mr_internal_error)
				2544	{
				2545	free_regbyte_ctx_stack (ctx, *rbc);
				2546	return mr_internal_error;
				2547	}
				2548	break;
				2549	case st_string:
				2550	len = str_length (sp->m_string);
				2551
				2552	/* prefilter the stream */
				2553	if (!filtering_string && di->m_string)
				2554	{
				2555	unsigned int filter_index = 0;
				2556	match_result result;
				2557	regbyte_ctx *null_ctx = NULL;
				2558
				2559	result = fast_match (di, text + ind, &filter_index, di->m_string, NULL, _BP, 1, &null_ctx);
				2560
				2561	if (result == mr_internal_error)
				2562	{
				2563	free_regbyte_ctx_stack (ctx, *rbc);
				2564	return mr_internal_error;
				2565	}
				2566
				2567	if (result != mr_matched)
				2568	{
				2569	status = mr_not_matched;
				2570	break;
				2571	}
				2572
				2573	if (filter_index != len \|\| !str_equal_n (sp->m_string, text + ind, len))
				2574	{
				2575	status = mr_not_matched;
				2576	break;
				2577	}
				2578
				2579	status = mr_matched;
				2580	ind += len;
				2581	}
				2582	else
				2583	{
				2584	status = mr_matched;
				2585	for (i = 0; status == mr_matched && i < len; i++)
				2586	if (text[ind + i] != sp->m_string[i])
				2587	status = mr_not_matched;
				2588
				2589	if (status == mr_matched)
				2590	ind += len;
				2591	}
				2592	break;
				2593	case st_byte:
				2594	status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
				2595	if (status == mr_matched)
				2596	ind++;
				2597	break;
				2598	case st_byte_range:
				2599	status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
				2600	mr_matched : mr_not_matched;
				2601	if (status == mr_matched)
				2602	ind++;
				2603	break;
				2604	case st_true:
				2605	status = mr_matched;
				2606	break;
				2607	case st_false:
				2608	status = mr_not_matched;
				2609	break;
				2610	case st_debug:
				2611	status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
				2612	break;
				2613	case st_identifier_loop:
				2614	status = mr_dont_emit;
				2615	for (;;)
				2616	{
				2617	match_result result;
				2618
				2619	save_ind = ind;
				2620	result = fast_match (di, text, &ind, sp->m_rule, &_P2, _BP, filtering_string, &ctx);
				2621
				2622	if (result == mr_error_raised)
				2623	{
				2624	status = result;
				2625	break;
				2626	}
				2627	else if (result == mr_matched)
				2628	{
				2629	if (!filtering_string)
				2630	{
				2631	if (sp->m_emits != NULL)
				2632	{
				2633	if (emit_push (sp->m_emits, _BP->_F + _P, text[ind - 1], save_ind, &ctx))
				2634	{
				2635	free_regbyte_ctx_stack (ctx, *rbc);
				2636	return mr_internal_error;
				2637	}
				2638	}
				2639
				2640	_P = _P2;
				2641	_P2 += sp->m_emits ? emit_size (sp->m_emits) : 0;
				2642	if (bytepool_reserve (_BP, _P2))
				2643	{
				2644	free_regbyte_ctx_stack (ctx, *rbc);
				2645	return mr_internal_error;
				2646	}
				2647	}
				2648	}
				2649	else if (result == mr_internal_error)
				2650	{
				2651	free_regbyte_ctx_stack (ctx, *rbc);
				2652	return mr_internal_error;
				2653	}
				2654	else
				2655	break;
				2656	}
				2657	break;
				2658	}
				2659	}
				2660	else
				2661	{
				2662	status = mr_not_matched;
				2663	}
				2664
				2665	if (status == mr_error_raised)
				2666	{
				2667	free_regbyte_ctx_stack (ctx, *rbc);
				2668
				2669	return mr_error_raised;
				2670	}
				2671
				2672	if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
				2673	{
				2674	free_regbyte_ctx_stack (ctx, *rbc);
				2675
				2676	if (sp->m_errtext)
				2677	{
				2678	set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
				2679	ind), ind);
				2680
				2681	return mr_error_raised;
				2682	}
				2683
				2684	return mr_not_matched;
				2685	}
				2686
				2687	if (status == mr_matched)
				2688	{
				2689	if (sp->m_emits != NULL)
				2690	if (emit_push (sp->m_emits, _BP->_F + _P, text[ind - 1], save_ind, &ctx))
				2691	{
				2692	free_regbyte_ctx_stack (ctx, *rbc);
				2693	return mr_internal_error;
				2694	}
				2695
				2696	_P = _P2;
				2697	}
				2698
				2699	/* if the rule operator is a logical or, we pick up the first matching specifier */
				2700	if (ru->m_oper == op_or && (status == mr_matched \|\| status == mr_dont_emit))
				2701	{
				2702	*index = ind;
				2703	*rbc = ctx;
				2704	if (!filtering_string)
				2705	*_PP = _P;
				2706	return mr_matched;
				2707	}
				2708
				2709	sp = sp->next;
				2710	}
				2711
				2712	/* everything went fine - all specifiers match up */
				2713	if (ru->m_oper == op_and && (status == mr_matched \|\| status == mr_dont_emit))
				2714	{
				2715	*index = ind;
				2716	*rbc = ctx;
				2717	if (!filtering_string)
				2718	*_PP = _P;
				2719	return mr_matched;
				2720	}
				2721
				2722	free_regbyte_ctx_stack (ctx, *rbc);
				2723	return mr_not_matched;
				2724	}
				2725
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2726	static byte error_get_token (error er, dict di, const byte text, unsigned int ind)
				2727	{
				2728	byte *str = NULL;
				2729
				2730	if (er->m_token)
				2731	{
				2732	barray *ba;
				2733	unsigned int filter_index = 0;
				2734	regbyte_ctx *ctx = NULL;
				2735
				2736	barray_create (&ba);
				2737	if (ba != NULL)
				2738	{
				2739	if (match (di, text + ind, &filter_index, er->m_token, &ba, 0, &ctx) == mr_matched &&
				2740	filter_index)
				2741	{
Brian Paul	bdd15b5	2004-05-04 15:11:06 +0000	[diff] [blame]	2742	str = (byte *) mem_alloc (filter_index + 1);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2743	if (str != NULL)
				2744	{
				2745	str_copy_n (str, text + ind, filter_index);
				2746	str[filter_index] = '\0';
				2747	}
				2748	}
				2749	barray_destroy (&ba);
				2750	}
				2751	}
				2752
				2753	return str;
				2754	}
				2755
				2756	typedef struct grammar_load_state_
				2757	{
				2758	dict *di;
				2759	byte *syntax_symbol;
				2760	byte *string_symbol;
				2761	map_str *maps;
				2762	map_byte *mapb;
				2763	map_rule *mapr;
				2764	} grammar_load_state;
				2765
				2766	static void grammar_load_state_create (grammar_load_state **gr)
				2767	{
Brian Paul	bdd15b5	2004-05-04 15:11:06 +0000	[diff] [blame]	2768	gr = (grammar_load_state ) mem_alloc (sizeof (grammar_load_state));
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2769	if (*gr)
				2770	{
				2771	(**gr).di = NULL;
				2772	(**gr).syntax_symbol = NULL;
				2773	(**gr).string_symbol = NULL;
				2774	(**gr).maps = NULL;
				2775	(**gr).mapb = NULL;
				2776	(**gr).mapr = NULL;
				2777	}
				2778	}
				2779
				2780	static void grammar_load_state_destroy (grammar_load_state **gr)
				2781	{
				2782	if (*gr)
				2783	{
				2784	dict_destroy (&(**gr).di);
				2785	mem_free ((void ) &(gr).syntax_symbol);
				2786	mem_free ((void ) &(gr).string_symbol);
				2787	map_str_destroy (&(**gr).maps);
				2788	map_byte_destroy (&(**gr).mapb);
				2789	map_rule_destroy (&(**gr).mapr);
				2790	mem_free ((void **) gr);
				2791	}
				2792	}
				2793
				2794	/*
				2795	the API
				2796	*/
				2797
				2798	grammar grammar_load_from_text (const byte *text)
				2799	{
				2800	grammar_load_state *g = NULL;
				2801	grammar id = 0;
				2802
				2803	clear_last_error ();
				2804
				2805	grammar_load_state_create (&g);
				2806	if (g == NULL)
				2807	return 0;
				2808
				2809	dict_create (&g->di);
				2810	if (g->di == NULL)
				2811	{
				2812	grammar_load_state_destroy (&g);
				2813	return 0;
				2814	}
				2815
				2816	eat_spaces (&text);
				2817
				2818	/* skip ".syntax" keyword */
				2819	text += 7;
				2820	eat_spaces (&text);
				2821
				2822	/* retrieve root symbol */
				2823	if (get_identifier (&text, &g->syntax_symbol))
				2824	{
				2825	grammar_load_state_destroy (&g);
				2826	return 0;
				2827	}
				2828	eat_spaces (&text);
				2829
				2830	/* skip semicolon */
				2831	text++;
				2832	eat_spaces (&text);
				2833
				2834	while (*text)
				2835	{
				2836	byte *symbol = NULL;
				2837	int is_dot = *text == '.';
				2838
				2839	if (is_dot)
				2840	text++;
				2841
				2842	if (get_identifier (&text, &symbol))
				2843	{
				2844	grammar_load_state_destroy (&g);
				2845	return 0;
				2846	}
				2847	eat_spaces (&text);
				2848
				2849	/* .emtcode */
				2850	if (is_dot && str_equal (symbol, (byte *) "emtcode"))
				2851	{
				2852	map_byte *ma = NULL;
				2853
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	2854	mem_free ((void *) (void ) &symbol);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2855
				2856	if (get_emtcode (&text, &ma))
				2857	{
				2858	grammar_load_state_destroy (&g);
				2859	return 0;
				2860	}
				2861
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	2862	map_byte_append (&g->mapb, ma);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2863	}
				2864	/* .regbyte */
				2865	else if (is_dot && str_equal (symbol, (byte *) "regbyte"))
				2866	{
				2867	map_byte *ma = NULL;
				2868
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	2869	mem_free ((void *) (void ) &symbol);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2870
				2871	if (get_regbyte (&text, &ma))
				2872	{
				2873	grammar_load_state_destroy (&g);
				2874	return 0;
				2875	}
				2876
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	2877	map_byte_append (&g->di->m_regbytes, ma);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2878	}
				2879	/* .errtext */
				2880	else if (is_dot && str_equal (symbol, (byte *) "errtext"))
				2881	{
				2882	map_str *ma = NULL;
				2883
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	2884	mem_free ((void *) (void ) &symbol);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2885
				2886	if (get_errtext (&text, &ma))
				2887	{
				2888	grammar_load_state_destroy (&g);
				2889	return 0;
				2890	}
				2891
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	2892	map_str_append (&g->maps, ma);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2893	}
				2894	/* .string */
				2895	else if (is_dot && str_equal (symbol, (byte *) "string"))
				2896	{
Brian Paul	a6c423d	2004-08-25 15:59:48 +0000	[diff] [blame]	2897	mem_free ((void *) (void ) &symbol);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2898
				2899	if (g->di->m_string != NULL)
				2900	{
				2901	grammar_load_state_destroy (&g);
				2902	return 0;
				2903	}
				2904
				2905	if (get_identifier (&text, &g->string_symbol))
				2906	{
				2907	grammar_load_state_destroy (&g);
				2908	return 0;
				2909	}
				2910
				2911	/* skip semicolon */
				2912	eat_spaces (&text);
				2913	text++;
				2914	eat_spaces (&text);
				2915	}
				2916	else
				2917	{
				2918	rule *ru = NULL;
				2919	map_rule *ma = NULL;
				2920
				2921	if (get_rule (&text, &ru, g->maps, g->mapb))
				2922	{
				2923	grammar_load_state_destroy (&g);
				2924	return 0;
				2925	}
				2926
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	2927	rule_append (&g->di->m_rulez, ru);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2928
				2929	/* if a rule consist of only one specifier, give it an ".and" operator */
				2930	if (ru->m_oper == op_none)
				2931	ru->m_oper = op_and;
				2932
				2933	map_rule_create (&ma);
				2934	if (ma == NULL)
				2935	{
				2936	grammar_load_state_destroy (&g);
				2937	return 0;
				2938	}
				2939
				2940	ma->key = symbol;
				2941	ma->data = ru;
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	2942	map_rule_append (&g->mapr, ma);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2943	}
				2944	}
				2945
				2946	if (update_dependencies (g->di, g->mapr, &g->syntax_symbol, &g->string_symbol,
				2947	g->di->m_regbytes))
				2948	{
				2949	grammar_load_state_destroy (&g);
				2950	return 0;
				2951	}
				2952
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	2953	dict_append (&g_dicts, g->di);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2954	id = g->di->m_id;
				2955	g->di = NULL;
				2956
				2957	grammar_load_state_destroy (&g);
				2958
				2959	return id;
				2960	}
				2961
				2962	int grammar_set_reg8 (grammar id, const byte *name, byte value)
				2963	{
				2964	dict *di = NULL;
				2965	map_byte *reg = NULL;
				2966
				2967	clear_last_error ();
				2968
				2969	dict_find (&g_dicts, id, &di);
				2970	if (di == NULL)
				2971	{
				2972	set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
				2973	return 0;
				2974	}
				2975
				2976	reg = map_byte_locate (&di->m_regbytes, name);
				2977	if (reg == NULL)
				2978	{
				2979	set_last_error (INVALID_REGISTER_NAME, str_duplicate (name), -1);
				2980	return 0;
				2981	}
				2982
				2983	reg->data = value;
				2984	return 1;
				2985	}
				2986
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	2987	/*
				2988	internal checking function used by both grammar_check and grammar_fast_check functions
				2989	*/
				2990	static int _grammar_check (grammar id, const byte text, byte prod, unsigned int size,
				2991	unsigned int estimate_prod_size, int use_fast_path)
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2992	{
				2993	dict *di = NULL;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2994	unsigned int index = 0;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	2995
				2996	clear_last_error ();
				2997
				2998	dict_find (&g_dicts, id, &di);
				2999	if (di == NULL)
				3000	{
				3001	set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
				3002	return 0;
				3003	}
				3004
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	3005	*prod = NULL;
				3006	*size = 0;
				3007
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	3008	if (use_fast_path)
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	3009	{
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	3010	regbyte_ctx *rbc = NULL;
				3011	bytepool *bp = NULL;
				3012	int _P = 0;
				3013
				3014	bytepool_create (&bp, estimate_prod_size);
				3015	if (bp == NULL)
				3016	return 0;
				3017
				3018	if (fast_match (di, text, &index, di->m_syntax, &_P, bp, 0, &rbc) != mr_matched)
				3019	{
				3020	bytepool_destroy (&bp);
				3021	free_regbyte_ctx_stack (rbc, NULL);
				3022	return 0;
				3023	}
				3024
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	3025	free_regbyte_ctx_stack (rbc, NULL);
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	3026
				3027	*prod = bp->_F;
				3028	*size = _P;
				3029	bp->_F = NULL;
				3030	bytepool_destroy (&bp);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	3031	}
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	3032	else
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	3033	{
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	3034	regbyte_ctx *rbc = NULL;
				3035	barray *ba = NULL;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	3036
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	3037	barray_create (&ba);
				3038	if (ba == NULL)
				3039	return 0;
				3040
				3041	if (match (di, text, &index, di->m_syntax, &ba, 0, &rbc) != mr_matched)
				3042	{
				3043	barray_destroy (&ba);
				3044	free_regbyte_ctx_stack (rbc, NULL);
				3045	return 0;
				3046	}
				3047
				3048	free_regbyte_ctx_stack (rbc, NULL);
				3049
				3050	prod = (byte ) mem_alloc (ba->len * sizeof (byte));
				3051	if (*prod == NULL)
				3052	{
				3053	barray_destroy (&ba);
				3054	return 0;
				3055	}
				3056
				3057	mem_copy (prod, ba->data, ba->len sizeof (byte));
				3058	*size = ba->len;
				3059	barray_destroy (&ba);
				3060	}
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	3061
				3062	return 1;
				3063	}
				3064
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	3065	int grammar_check (grammar id, const byte text, byte prod, unsigned int size)
				3066	{
				3067	return _grammar_check (id, text, prod, size, 0, 0);
				3068	}
				3069
				3070	int grammar_fast_check (grammar id, const byte text, byte prod, unsigned int size,
				3071	unsigned int estimate_prod_size)
				3072	{
				3073	return _grammar_check (id, text, prod, size, estimate_prod_size, 1);
				3074	}
				3075
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	3076	int grammar_destroy (grammar id)
				3077	{
				3078	dict **di = &g_dicts;
				3079
				3080	clear_last_error ();
				3081
				3082	while (*di != NULL)
				3083	{
				3084	if ((**di).m_id == id)
				3085	{
				3086	dict tmp = di;
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	3087	di = (*di).next;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	3088	dict_destroy (&tmp);
				3089	return 1;
				3090	}
				3091
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	3092	di = &(**di).next;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	3093	}
				3094
				3095	set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
				3096	return 0;
				3097	}
				3098
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	3099	static void append_character (const char x, byte text, int dots_made, int *len, int size)
				3100	{
				3101	if (*dots_made == 0)
				3102	{
				3103	if (*len < size - 1)
				3104	{
				3105	text[(*len)++] = x;
				3106	text[*len] = '\0';
				3107	}
				3108	else
				3109	{
				3110	int i;
				3111	for (i = 0; i < 3; i++)
				3112	if (--(*len) >= 0)
				3113	text[*len] = '.';
				3114	*dots_made = 1;
				3115	}
				3116	}
				3117	}
				3118
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	3119	void grammar_get_last_error (byte text, unsigned int size, int pos)
				3120	{
Brian Paul	bd997cd	2004-07-20 21:12:56 +0000	[diff] [blame]	3121	int len = 0, dots_made = 0;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	3122	const byte *p = error_message;
				3123
				3124	*text = '\0';
				3125
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	3126	if (p)
				3127	{
				3128	while (*p)
				3129	{
				3130	if (*p == '$')
				3131	{
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	3132	const byte *r = error_param;
				3133
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	3134	while (*r)
				3135	{
				3136	append_character (*r++, text, &dots_made, &len, (int) size);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	3137	}
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	3138
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	3139	p++;
				3140	}
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	3141	else
				3142	{
				3143	append_character (*p++, text, &dots_made, &len, size);
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	3144	}
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	3145	}
Brian Paul	289ffee	2004-10-02 15:56:50 +0000	[diff] [blame]	3146	}
Michal Krol	904ef74	2004-10-20 14:54:17 +0000	[diff] [blame^]	3147
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	3148	*pos = error_position;
Michal Krol	a904b49	2004-03-04 13:07:52 +0000	[diff] [blame]	3149	}