blob: 7f2ee42d21d7ba06ec26da8cbf955eff1822e8d5 [file] [log] [blame]
Michal Krola904b492004-03-04 13:07:52 +00001/*
2 * Mesa 3-D graphics library
Michal Krola79d4e72006-10-19 08:07:00 +00003 * Version: 6.6
Michal Krola904b492004-03-04 13:07:52 +00004 *
Michal Krola79d4e72006-10-19 08:07:00 +00005 * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
Michal Krola904b492004-03-04 13:07:52 +00006 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25/**
26 * \file grammar.c
27 * syntax parsing engine
28 * \author Michal Krol
29 */
30
31#ifndef GRAMMAR_PORT_BUILD
32#error Do not build this file directly, build your grammar_XXX.c instead, which includes this file
33#endif
34
35/*
Michal Krola904b492004-03-04 13:07:52 +000036*/
37
38/*
39 INTRODUCTION
40 ------------
41
42 The task is to check the syntax of an input string. Input string is a stream of ASCII
43 characters terminated with a null-character ('\0'). Checking it using C language is
44 difficult and hard to implement without bugs. It is hard to maintain and make changes when
45 the syntax changes.
46
47 This is because of a high redundancy of the C code. Large blocks of code are duplicated with
48 only small changes. Even use of macros does not solve the problem because macros cannot
49 erase the complexity of the problem.
50
51 The resolution is to create a new language that will be highly oriented to our task. Once
52 we describe a particular syntax, we are done. We can then focus on the code that implements
53 the language. The size and complexity of it is relatively small than the code that directly
54 checks the syntax.
55
56 First, we must implement our new language. Here, the language is implemented in C, but it
57 could also be implemented in any other language. The code is listed below. We must take
58 a good care that it is bug free. This is simple because the code is simple and clean.
59
60 Next, we must describe the syntax of our new language in itself. Once created and checked
61 manually that it is correct, we can use it to check another scripts.
62
63 Note that our new language loading code does not have to check the syntax. It is because we
64 assume that the script describing itself is correct, and other scripts can be syntactically
65 checked by the former script. The loading code must only do semantic checking which leads us to
66 simple resolving references.
67
68 THE LANGUAGE
69 ------------
70
71 Here I will describe the syntax of the new language (further called "Synek"). It is mainly a
72 sequence of declarations terminated by a semicolon. The declaration consists of a symbol,
73 which is an identifier, and its definition. A definition is in turn a sequence of specifiers
74 connected with ".and" or ".or" operator. These operators cannot be mixed together in a one
75 definition. Specifier can be a symbol, string, character, character range or a special
76 keyword ".true" or ".false".
77
78 On the very beginning of the script there is a declaration of a root symbol and is in the form:
79 .syntax <root_symbol>;
80 The <root_symbol> must be on of the symbols in declaration sequence. The syntax is correct if
81 the root symbol evaluates to true. A symbol evaluates to true if the definition associated with
82 the symbol evaluates to true. Definition evaluation depends on the operator used to connect
83 specifiers in the definition. If ".and" operator is used, definition evaluates to true if and
84 only if all the specifiers evaluate to true. If ".or" operator is used, definition evalutes to
85 true if any of the specifiers evaluates to true. If definition contains only one specifier,
86 it is evaluated as if it was connected with ".true" keyword by ".and" operator.
87
88 If specifier is a ".true" keyword, it always evaluates to true.
89
90 If specifier is a ".false" keyword, it always evaluates to false. Specifier evaluates to false
91 when it does not evaluate to true.
92
93 Character range specifier is in the form:
94 '<first_character>' - '<second_character>'
95 If specifier is a character range, it evaluates to true if character in the stream is greater
96 or equal to <first_character> and less or equal to <second_character>. In that situation
97 the stream pointer is advanced to point to next character in the stream. All C-style escape
98 sequences are supported although trigraph sequences are not. The comparisions are performed
99 on 8-bit unsigned integers.
100
101 Character specifier is in the form:
102 '<single_character>'
103 It evaluates to true if the following character range specifier evaluates to true:
104 '<single_character>' - '<single_character>'
105
106 String specifier is in the form:
107 "<string>"
108 Let N be the number of characters in <string>. Let <string>[i] designate i-th character in
109 <string>. Then the string specifier evaluates to true if and only if for i in the range [0, N)
110 the following character specifier evaluates to true:
111 '<string>[i]'
112 If <string>[i] is a quotation mark, '<string>[i]' is replaced with '\<string>[i]'.
113
114 Symbol specifier can be optionally preceded by a ".loop" keyword in the form:
115 .loop <symbol> (1)
116 where <symbol> is defined as follows:
117 <symbol> <definition>; (2)
118 Construction (1) is replaced by the following code:
119 <symbol$1>
120 and declaration (2) is replaced by the following:
121 <symbol$1> <symbol$2> .or .true;
122 <symbol$2> <symbol> .and <symbol$1>;
123 <symbol> <definition>;
124
125 Synek supports also a register mechanizm. User can, in its SYN file, declare a number of
126 registers that can be accessed in the syn body. Each reg has its name and a default value.
127 The register is one byte wide. The C code can change the default value by calling
128 grammar_set_reg8() with grammar id, register name and a new value. As we know, each rule is
129 a sequence of specifiers joined with .and or .or operator. And now each specifier can be
130 prefixed with a condition expression in a form ".if (<reg_name> <operator> <hex_literal>)"
131 where <operator> can be == or !=. If the condition evaluates to false, the specifier
132 evaluates to .false. Otherwise it evalutes to the specifier.
133
134 ESCAPE SEQUENCES
135 ----------------
136
137 Synek supports all escape sequences in character specifiers. The mapping table is listed below.
138 All occurences of the characters in the first column are replaced with the corresponding
139 character in the second column.
140
141 Escape sequence Represents
142 ------------------------------------------------------------------------------------------------
143 \a Bell (alert)
144 \b Backspace
145 \f Formfeed
146 \n New line
147 \r Carriage return
148 \t Horizontal tab
149 \v Vertical tab
150 \' Single quotation mark
151 \" Double quotation mark
152 \\ Backslash
153 \? Literal question mark
154 \ooo ASCII character in octal notation
155 \xhhh ASCII character in hexadecimal notation
156 ------------------------------------------------------------------------------------------------
157
158 RAISING ERRORS
159 --------------
160
161 Any specifier can be followed by a special construction that is executed when the specifier
162 evaluates to false. The construction is in the form:
163 .error <ERROR_TEXT>
164 <ERROR_TEXT> is an identifier declared earlier by error text declaration. The declaration is
165 in the form:
166 .errtext <ERROR_TEXT> "<error_desc>"
167 When specifier evaluates to false and this construction is present, parsing is stopped
168 immediately and <error_desc> is returned as a result of parsing. The error position is also
169 returned and it is meant as an offset from the beggining of the stream to the character that
170 was valid so far. Example:
171
172 (**** syntax script ****)
173
174 .syntax program;
175 .errtext MISSING_SEMICOLON "missing ';'"
176 program declaration .and .loop space .and ';' .error MISSING_SEMICOLON .and
177 .loop space .and '\0';
178 declaration "declare" .and .loop space .and identifier;
179 space ' ';
180
181 (**** sample code ****)
182
183 declare foo ,
184
185 In the example above checking the sample code will result in error message "missing ';'" and
186 error position 12. The sample code is not correct. Note the presence of '\0' specifier to
187 assure that there is no code after semicolon - only spaces.
188 <error_desc> can optionally contain identifier surrounded by dollar signs $. In such a case,
189 the identifier and dollar signs are replaced by a string retrieved by invoking symbol with
190 the identifier name. The starting position is the error position. The lenght of the resulting
191 string is the position after invoking the symbol.
192
193 PRODUCTION
194 ----------
195
196 Synek not only checks the syntax but it can also produce (emit) bytes associated with specifiers
197 that evaluate to true. That is, every specifier and optional error construction can be followed
198 by a number of emit constructions that are in the form:
199 .emit <parameter>
200 <paramater> can be a HEX number, identifier, a star * or a dollar $. HEX number is preceded by
201 0x or 0X. If <parameter> is an identifier, it must be earlier declared by emit code declaration
202 in the form:
203 .emtcode <identifier> <hex_number>
204
205 When given specifier evaluates to true, all emits associated with the specifier are output
206 in order they were declared. A star means that last-read character should be output instead
207 of constant value. Example:
208
209 (**** syntax script ****)
210
211 .syntax foobar;
212 .emtcode WORD_FOO 0x01
213 .emtcode WORD_BAR 0x02
214 foobar FOO .emit WORD_FOO .or BAR .emit WORD_BAR .or .true .emit 0x00;
215 FOO "foo" .and SPACE;
216 BAR "bar" .and SPACE;
217 SPACE ' ' .or '\0';
218
219 (**** sample text 1 ****)
220
221 foo
222
223 (**** sample text 2 ****)
224
225 foobar
226
227 For both samples the result will be one-element array. For first sample text it will be
228 value 1, for second - 0. Note that every text will be accepted because of presence of
229 .true as an alternative.
230
231 Another example:
232
233 (**** syntax script ****)
234
235 .syntax declaration;
236 .emtcode VARIABLE 0x01
237 declaration "declare" .and .loop space .and
238 identifier .emit VARIABLE .and (1)
239 .true .emit 0x00 .and (2)
240 .loop space .and ';';
241 space ' ' .or '\t';
242 identifier .loop id_char .emit *; (3)
243 id_char 'a'-'z' .or 'A'-'Z' .or '_';
244
245 (**** sample code ****)
246
247 declare fubar;
248
249 In specifier (1) symbol <identifier> is followed by .emit VARIABLE. If it evaluates to
250 true, VARIABLE constant and then production of the symbol is output. Specifier (2) is used
251 to terminate the string with null to signal when the string ends. Specifier (3) outputs
252 all characters that make declared identifier. The result of sample code will be the
253 following array:
254 { 1, 'f', 'u', 'b', 'a', 'r', 0 }
255
256 If .emit is followed by dollar $, it means that current position should be output. Current
257 position is a 32-bit unsigned integer distance from the very beginning of the parsed string to
258 first character consumed by the specifier associated with the .emit instruction. Current
259 position is stored in the output buffer in Little-Endian convention (the lowest byte comes
260 first).
261*/
262
263static void mem_free (void **);
264
265/*
266 internal error messages
267*/
268static const byte *OUT_OF_MEMORY = (byte *) "internal error 1001: out of physical memory";
269static const byte *UNRESOLVED_REFERENCE = (byte *) "internal error 1002: unresolved reference '$'";
270static const byte *INVALID_GRAMMAR_ID = (byte *) "internal error 1003: invalid grammar object";
271static const byte *INVALID_REGISTER_NAME = (byte *) "internal error 1004: invalid register name: '$'";
Brian Paul8e8fa632005-07-01 02:03:33 +0000272/*static const byte *DUPLICATE_IDENTIFIER = (byte *) "internal error 1005: identifier '$' already defined";*/
Michal Krol904ef742004-10-20 14:54:17 +0000273static const byte *UNREFERENCED_IDENTIFIER =(byte *) "internal error 1006: unreferenced identifier '$'";
Michal Krola904b492004-03-04 13:07:52 +0000274
Michal Krol904ef742004-10-20 14:54:17 +0000275static const byte *error_message = NULL; /* points to one of the error messages above */
Michal Krola904b492004-03-04 13:07:52 +0000276static byte *error_param = NULL; /* this is inserted into error_message in place of $ */
277static int error_position = -1;
278
279static byte *unknown = (byte *) "???";
280
Alan Hourihanec63f3cf2004-12-08 14:00:46 +0000281static void clear_last_error (void)
Michal Krola904b492004-03-04 13:07:52 +0000282{
283 /* reset error message */
284 error_message = NULL;
285
286 /* free error parameter - if error_param is a "???" don't free it - it's static */
287 if (error_param != unknown)
Brian Paula6c423d2004-08-25 15:59:48 +0000288 mem_free ((void **) (void *) &error_param);
Michal Krola904b492004-03-04 13:07:52 +0000289 else
290 error_param = NULL;
291
292 /* reset error position */
293 error_position = -1;
294}
295
296static void set_last_error (const byte *msg, byte *param, int pos)
297{
Michal Krol904ef742004-10-20 14:54:17 +0000298 /* error message can be set only once */
Michal Krola904b492004-03-04 13:07:52 +0000299 if (error_message != NULL)
300 {
Brian Paula6c423d2004-08-25 15:59:48 +0000301 mem_free ((void **) (void *) &param);
Michal Krola904b492004-03-04 13:07:52 +0000302 return;
303 }
304
305 error_message = msg;
306
Michal Krol904ef742004-10-20 14:54:17 +0000307 /* if param is NULL, set error_param to unknown ("???") */
308 /* note: do not try to strdup the "???" - it may be that we are here because of */
309 /* out of memory error so strdup can fail */
Michal Krola904b492004-03-04 13:07:52 +0000310 if (param != NULL)
311 error_param = param;
312 else
313 error_param = unknown;
314
315 error_position = pos;
316}
317
318/*
319 memory management routines
320*/
321static void *mem_alloc (size_t size)
322{
323 void *ptr = grammar_alloc_malloc (size);
324 if (ptr == NULL)
325 set_last_error (OUT_OF_MEMORY, NULL, -1);
326 return ptr;
327}
328
329static void *mem_copy (void *dst, const void *src, size_t size)
330{
331 return grammar_memory_copy (dst, src, size);
332}
333
334static void mem_free (void **ptr)
335{
336 grammar_alloc_free (*ptr);
337 *ptr = NULL;
338}
339
340static void *mem_realloc (void *ptr, size_t old_size, size_t new_size)
341{
342 void *ptr2 = grammar_alloc_realloc (ptr, old_size, new_size);
343 if (ptr2 == NULL)
344 set_last_error (OUT_OF_MEMORY, NULL, -1);
345 return ptr2;
346}
347
348static byte *str_copy_n (byte *dst, const byte *src, size_t max_len)
349{
350 return grammar_string_copy_n (dst, src, max_len);
351}
352
353static byte *str_duplicate (const byte *str)
354{
355 byte *new_str = grammar_string_duplicate (str);
356 if (new_str == NULL)
357 set_last_error (OUT_OF_MEMORY, NULL, -1);
358 return new_str;
359}
360
361static int str_equal (const byte *str1, const byte *str2)
362{
363 return grammar_string_compare (str1, str2) == 0;
364}
365
366static int str_equal_n (const byte *str1, const byte *str2, unsigned int n)
367{
368 return grammar_string_compare_n (str1, str2, n) == 0;
369}
370
Michal Krola79d4e72006-10-19 08:07:00 +0000371static int
372str_length (const byte *str)
Michal Krola904b492004-03-04 13:07:52 +0000373{
Michal Krola79d4e72006-10-19 08:07:00 +0000374 return (int) (grammar_string_length (str));
Michal Krola904b492004-03-04 13:07:52 +0000375}
376
377/*
Michal Krol904ef742004-10-20 14:54:17 +0000378 useful macros
379*/
380#define GRAMMAR_IMPLEMENT_LIST_APPEND(_Ty)\
381 static void _Ty##_append (_Ty **x, _Ty *nx) {\
382 while (*x) x = &(**x).next;\
383 *x = nx;\
384 }
385
386/*
Michal Krola904b492004-03-04 13:07:52 +0000387 string to byte map typedef
388*/
389typedef struct map_byte_
390{
391 byte *key;
392 byte data;
393 struct map_byte_ *next;
394} map_byte;
395
396static void map_byte_create (map_byte **ma)
397{
Brian Paulbdd15b52004-05-04 15:11:06 +0000398 *ma = (map_byte *) mem_alloc (sizeof (map_byte));
Michal Krola904b492004-03-04 13:07:52 +0000399 if (*ma)
400 {
401 (**ma).key = NULL;
402 (**ma).data = '\0';
403 (**ma).next = NULL;
404 }
405}
406
Michal Krola904b492004-03-04 13:07:52 +0000407static void map_byte_destroy (map_byte **ma)
408{
409 if (*ma)
410 {
411 map_byte_destroy (&(**ma).next);
412 mem_free ((void **) &(**ma).key);
413 mem_free ((void **) ma);
414 }
415}
416
Michal Krol904ef742004-10-20 14:54:17 +0000417GRAMMAR_IMPLEMENT_LIST_APPEND(map_byte)
Michal Krola904b492004-03-04 13:07:52 +0000418
419/*
420 searches the map for the specified key,
421 returns pointer to the element with the specified key if it exists
422 returns NULL otherwise
423*/
Brian Paul788461f2004-03-29 14:53:49 +0000424static map_byte *map_byte_locate (map_byte **ma, const byte *key)
Michal Krola904b492004-03-04 13:07:52 +0000425{
426 while (*ma)
427 {
428 if (str_equal ((**ma).key, key))
429 return *ma;
430
431 ma = &(**ma).next;
432 }
433
434 set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
435 return NULL;
436}
437
438/*
439 searches the map for specified key,
440 if the key is matched, *data is filled with data associated with the key,
441 returns 0 if the key is matched,
442 returns 1 otherwise
443*/
444static int map_byte_find (map_byte **ma, const byte *key, byte *data)
445{
446 map_byte *found = map_byte_locate (ma, key);
447 if (found != NULL)
448 {
449 *data = found->data;
450
451 return 0;
452 }
453
454 return 1;
455}
456
457/*
458 regbyte context typedef
459
460 Each regbyte consists of its name and a default value. These are static and created at
461 grammar script compile-time, for example the following line:
462 .regbyte vertex_blend 0x00
463 adds a new regbyte named "vertex_blend" to the static list and initializes it to 0.
464 When the script is executed, this regbyte can be accessed by name for read and write. When a
465 particular regbyte is written, a new regbyte_ctx entry is added to the top of the regbyte_ctx
466 stack. The new entry contains information abot which regbyte it references and its new value.
467 When a given regbyte is accessed for read, the stack is searched top-down to find an
468 entry that references the regbyte. The first matching entry is used to return the current
469 value it holds. If no entry is found, the default value is returned.
470*/
471typedef struct regbyte_ctx_
472{
473 map_byte *m_regbyte;
474 byte m_current_value;
475 struct regbyte_ctx_ *m_prev;
476} regbyte_ctx;
477
478static void regbyte_ctx_create (regbyte_ctx **re)
479{
Brian Paulbdd15b52004-05-04 15:11:06 +0000480 *re = (regbyte_ctx *) mem_alloc (sizeof (regbyte_ctx));
Michal Krola904b492004-03-04 13:07:52 +0000481 if (*re)
482 {
483 (**re).m_regbyte = NULL;
484 (**re).m_prev = NULL;
485 }
486}
487
488static void regbyte_ctx_destroy (regbyte_ctx **re)
489{
490 if (*re)
491 {
492 mem_free ((void **) re);
493 }
494}
495
496static byte regbyte_ctx_extract (regbyte_ctx **re, map_byte *reg)
497{
498 /* first lookup in the register stack */
499 while (*re != NULL)
500 {
501 if ((**re).m_regbyte == reg)
502 return (**re).m_current_value;
503
504 re = &(**re).m_prev;
505 }
506
507 /* if not found - return the default value */
508 return reg->data;
509}
510
511/*
512 emit type typedef
513*/
514typedef enum emit_type_
515{
516 et_byte, /* explicit number */
517 et_stream, /* eaten character */
518 et_position /* current position */
519} emit_type;
520
521/*
522 emit destination typedef
523*/
524typedef enum emit_dest_
525{
526 ed_output, /* write to the output buffer */
527 ed_regbyte /* write a particular regbyte */
528} emit_dest;
529
530/*
531 emit typedef
532*/
533typedef struct emit_
534{
535 emit_dest m_emit_dest;
536 emit_type m_emit_type; /* ed_output */
537 byte m_byte; /* et_byte */
538 map_byte *m_regbyte; /* ed_regbyte */
539 byte *m_regname; /* ed_regbyte - temporary */
540 struct emit_ *m_next;
541} emit;
542
543static void emit_create (emit **em)
544{
Brian Paulbdd15b52004-05-04 15:11:06 +0000545 *em = (emit *) mem_alloc (sizeof (emit));
Michal Krola904b492004-03-04 13:07:52 +0000546 if (*em)
547 {
548 (**em).m_emit_dest = ed_output;
549 (**em).m_emit_type = et_byte;
550 (**em).m_byte = '\0';
551 (**em).m_regbyte = NULL;
552 (**em).m_regname = NULL;
553 (**em).m_next = NULL;
554 }
555}
556
557static void emit_destroy (emit **em)
558{
559 if (*em)
560 {
561 emit_destroy (&(**em).m_next);
562 mem_free ((void **) &(**em).m_regname);
563 mem_free ((void **) em);
564 }
565}
566
Michal Krol904ef742004-10-20 14:54:17 +0000567static unsigned int emit_size (emit *_E)
568{
Ian Romanick9f23a3a2005-07-28 00:11:10 +0000569 unsigned int n = 0;
Michal Krol904ef742004-10-20 14:54:17 +0000570
571 while (_E != NULL)
572 {
573 if (_E->m_emit_dest == ed_output)
574 {
575 if (_E->m_emit_type == et_position)
Ian Romanick9f23a3a2005-07-28 00:11:10 +0000576 n += 4; /* position is a 32-bit unsigned integer */
Michal Krol904ef742004-10-20 14:54:17 +0000577 else
Ian Romanick9f23a3a2005-07-28 00:11:10 +0000578 n++;
Michal Krol904ef742004-10-20 14:54:17 +0000579 }
580 _E = _E->m_next;
581 }
582
Ian Romanick9f23a3a2005-07-28 00:11:10 +0000583 return n;
Michal Krol904ef742004-10-20 14:54:17 +0000584}
585
Ian Romanick9f23a3a2005-07-28 00:11:10 +0000586static int emit_push (emit *_E, byte *_P, byte c, unsigned int _Pos, regbyte_ctx **_Ctx)
Michal Krol904ef742004-10-20 14:54:17 +0000587{
588 while (_E != NULL)
589 {
590 if (_E->m_emit_dest == ed_output)
591 {
592 if (_E->m_emit_type == et_byte)
593 *_P++ = _E->m_byte;
594 else if (_E->m_emit_type == et_stream)
Ian Romanick9f23a3a2005-07-28 00:11:10 +0000595 *_P++ = c;
Michal Krol904ef742004-10-20 14:54:17 +0000596 else /* _Em->type == et_position */
597 {
598 *_P++ = (byte) (_Pos);
599 *_P++ = (byte) (_Pos >> 8);
600 *_P++ = (byte) (_Pos >> 16);
601 *_P++ = (byte) (_Pos >> 24);
602 }
603 }
604 else
605 {
606 regbyte_ctx *new_rbc;
607 regbyte_ctx_create (&new_rbc);
608 if (new_rbc == NULL)
609 return 1;
610
611 new_rbc->m_prev = *_Ctx;
612 new_rbc->m_regbyte = _E->m_regbyte;
613 *_Ctx = new_rbc;
614
615 if (_E->m_emit_type == et_byte)
616 new_rbc->m_current_value = _E->m_byte;
617 else if (_E->m_emit_type == et_stream)
Ian Romanick9f23a3a2005-07-28 00:11:10 +0000618 new_rbc->m_current_value = c;
Michal Krol904ef742004-10-20 14:54:17 +0000619 }
620
621 _E = _E->m_next;
622 }
623
624 return 0;
625}
626
Michal Krola904b492004-03-04 13:07:52 +0000627/*
628 error typedef
629*/
630typedef struct error_
631{
632 byte *m_text;
633 byte *m_token_name;
634 struct rule_ *m_token;
635} error;
636
637static void error_create (error **er)
638{
Brian Paulbdd15b52004-05-04 15:11:06 +0000639 *er = (error *) mem_alloc (sizeof (error));
Michal Krola904b492004-03-04 13:07:52 +0000640 if (*er)
641 {
642 (**er).m_text = NULL;
643 (**er).m_token_name = NULL;
644 (**er).m_token = NULL;
645 }
646}
647
648static void error_destroy (error **er)
649{
650 if (*er)
651 {
652 mem_free ((void **) &(**er).m_text);
653 mem_free ((void **) &(**er).m_token_name);
654 mem_free ((void **) er);
655 }
656}
657
658struct dict_;
Michal Krola79d4e72006-10-19 08:07:00 +0000659
660static byte *
661error_get_token (error *, struct dict_ *, const byte *, int);
Michal Krola904b492004-03-04 13:07:52 +0000662
663/*
664 condition operand type typedef
665*/
666typedef enum cond_oper_type_
667{
668 cot_byte, /* constant 8-bit unsigned integer */
669 cot_regbyte /* pointer to byte register containing the current value */
670} cond_oper_type;
671
672/*
673 condition operand typedef
674*/
675typedef struct cond_oper_
676{
677 cond_oper_type m_type;
678 byte m_byte; /* cot_byte */
679 map_byte *m_regbyte; /* cot_regbyte */
680 byte *m_regname; /* cot_regbyte - temporary */
681} cond_oper;
682
683/*
684 condition type typedef
685*/
686typedef enum cond_type_
687{
688 ct_equal,
689 ct_not_equal
690} cond_type;
691
692/*
693 condition typedef
694*/
695typedef struct cond_
696{
697 cond_type m_type;
698 cond_oper m_operands[2];
699} cond;
700
701static void cond_create (cond **co)
702{
Brian Paulbdd15b52004-05-04 15:11:06 +0000703 *co = (cond *) mem_alloc (sizeof (cond));
Michal Krola904b492004-03-04 13:07:52 +0000704 if (*co)
705 {
706 (**co).m_operands[0].m_regname = NULL;
707 (**co).m_operands[1].m_regname = NULL;
708 }
709}
710
711static void cond_destroy (cond **co)
712{
713 if (*co)
714 {
715 mem_free ((void **) &(**co).m_operands[0].m_regname);
716 mem_free ((void **) &(**co).m_operands[1].m_regname);
717 mem_free ((void **) co);
718 }
719}
720
721/*
722 specifier type typedef
723*/
724typedef enum spec_type_
725{
726 st_false,
727 st_true,
728 st_byte,
729 st_byte_range,
730 st_string,
731 st_identifier,
732 st_identifier_loop,
733 st_debug
734} spec_type;
735
736/*
737 specifier typedef
738*/
739typedef struct spec_
740{
741 spec_type m_spec_type;
742 byte m_byte[2]; /* st_byte, st_byte_range */
743 byte *m_string; /* st_string */
744 struct rule_ *m_rule; /* st_identifier, st_identifier_loop */
745 emit *m_emits;
746 error *m_errtext;
747 cond *m_cond;
Michal Krol904ef742004-10-20 14:54:17 +0000748 struct spec_ *next;
Michal Krola904b492004-03-04 13:07:52 +0000749} spec;
750
751static void spec_create (spec **sp)
752{
Brian Paulbdd15b52004-05-04 15:11:06 +0000753 *sp = (spec *) mem_alloc (sizeof (spec));
Michal Krola904b492004-03-04 13:07:52 +0000754 if (*sp)
755 {
756 (**sp).m_spec_type = st_false;
757 (**sp).m_byte[0] = '\0';
758 (**sp).m_byte[1] = '\0';
759 (**sp).m_string = NULL;
760 (**sp).m_rule = NULL;
761 (**sp).m_emits = NULL;
762 (**sp).m_errtext = NULL;
763 (**sp).m_cond = NULL;
Michal Krol904ef742004-10-20 14:54:17 +0000764 (**sp).next = NULL;
Michal Krola904b492004-03-04 13:07:52 +0000765 }
766}
767
768static void spec_destroy (spec **sp)
769{
770 if (*sp)
771 {
Michal Krol904ef742004-10-20 14:54:17 +0000772 spec_destroy (&(**sp).next);
Michal Krola904b492004-03-04 13:07:52 +0000773 emit_destroy (&(**sp).m_emits);
774 error_destroy (&(**sp).m_errtext);
775 mem_free ((void **) &(**sp).m_string);
776 cond_destroy (&(**sp).m_cond);
777 mem_free ((void **) sp);
778 }
779}
780
Michal Krol904ef742004-10-20 14:54:17 +0000781GRAMMAR_IMPLEMENT_LIST_APPEND(spec)
Michal Krola904b492004-03-04 13:07:52 +0000782
783/*
784 operator typedef
785*/
786typedef enum oper_
787{
788 op_none,
789 op_and,
790 op_or
791} oper;
792
793/*
794 rule typedef
795*/
796typedef struct rule_
797{
798 oper m_oper;
799 spec *m_specs;
Michal Krol904ef742004-10-20 14:54:17 +0000800 struct rule_ *next;
801 int m_referenced;
Michal Krola904b492004-03-04 13:07:52 +0000802} rule;
803
804static void rule_create (rule **ru)
805{
Brian Paulbdd15b52004-05-04 15:11:06 +0000806 *ru = (rule *) mem_alloc (sizeof (rule));
Michal Krola904b492004-03-04 13:07:52 +0000807 if (*ru)
808 {
809 (**ru).m_oper = op_none;
810 (**ru).m_specs = NULL;
Michal Krol904ef742004-10-20 14:54:17 +0000811 (**ru).next = NULL;
812 (**ru).m_referenced = 0;
Michal Krola904b492004-03-04 13:07:52 +0000813 }
814}
815
816static void rule_destroy (rule **ru)
817{
818 if (*ru)
819 {
Michal Krol904ef742004-10-20 14:54:17 +0000820 rule_destroy (&(**ru).next);
Michal Krola904b492004-03-04 13:07:52 +0000821 spec_destroy (&(**ru).m_specs);
822 mem_free ((void **) ru);
823 }
824}
825
Michal Krol904ef742004-10-20 14:54:17 +0000826GRAMMAR_IMPLEMENT_LIST_APPEND(rule)
Michal Krola904b492004-03-04 13:07:52 +0000827
828/*
829 returns unique grammar id
830*/
Alan Hourihanec63f3cf2004-12-08 14:00:46 +0000831static grammar next_valid_grammar_id (void)
Michal Krola904b492004-03-04 13:07:52 +0000832{
833 static grammar id = 0;
834
835 return ++id;
836}
837
838/*
839 dictionary typedef
840*/
841typedef struct dict_
842{
843 rule *m_rulez;
844 rule *m_syntax;
845 rule *m_string;
846 map_byte *m_regbytes;
847 grammar m_id;
Michal Krol904ef742004-10-20 14:54:17 +0000848 struct dict_ *next;
Michal Krola904b492004-03-04 13:07:52 +0000849} dict;
850
851static void dict_create (dict **di)
852{
Brian Paulbdd15b52004-05-04 15:11:06 +0000853 *di = (dict *) mem_alloc (sizeof (dict));
Michal Krola904b492004-03-04 13:07:52 +0000854 if (*di)
855 {
856 (**di).m_rulez = NULL;
857 (**di).m_syntax = NULL;
858 (**di).m_string = NULL;
859 (**di).m_regbytes = NULL;
860 (**di).m_id = next_valid_grammar_id ();
Michal Krol904ef742004-10-20 14:54:17 +0000861 (**di).next = NULL;
Michal Krola904b492004-03-04 13:07:52 +0000862 }
863}
864
865static void dict_destroy (dict **di)
866{
867 if (*di)
868 {
869 rule_destroy (&(**di).m_rulez);
870 map_byte_destroy (&(**di).m_regbytes);
871 mem_free ((void **) di);
872 }
873}
874
Michal Krol904ef742004-10-20 14:54:17 +0000875GRAMMAR_IMPLEMENT_LIST_APPEND(dict)
Michal Krola904b492004-03-04 13:07:52 +0000876
877static void dict_find (dict **di, grammar key, dict **data)
878{
879 while (*di)
880 {
881 if ((**di).m_id == key)
882 {
883 *data = *di;
884 return;
885 }
886
Michal Krol904ef742004-10-20 14:54:17 +0000887 di = &(**di).next;
Michal Krola904b492004-03-04 13:07:52 +0000888 }
889
890 *data = NULL;
891}
892
893static dict *g_dicts = NULL;
894
895/*
896 byte array typedef
Michal Krola904b492004-03-04 13:07:52 +0000897*/
898typedef struct barray_
899{
900 byte *data;
901 unsigned int len;
902} barray;
903
904static void barray_create (barray **ba)
905{
Brian Paulbdd15b52004-05-04 15:11:06 +0000906 *ba = (barray *) mem_alloc (sizeof (barray));
Michal Krola904b492004-03-04 13:07:52 +0000907 if (*ba)
908 {
909 (**ba).data = NULL;
910 (**ba).len = 0;
911 }
912}
913
914static void barray_destroy (barray **ba)
915{
916 if (*ba)
917 {
918 mem_free ((void **) &(**ba).data);
919 mem_free ((void **) ba);
920 }
921}
922
923/*
924 reallocates byte array to requested size,
925 returns 0 on success,
926 returns 1 otherwise
927*/
928static int barray_resize (barray **ba, unsigned int nlen)
929{
930 byte *new_pointer;
931
932 if (nlen == 0)
933 {
934 mem_free ((void **) &(**ba).data);
935 (**ba).data = NULL;
936 (**ba).len = 0;
937
938 return 0;
939 }
940 else
941 {
Michal Krol904ef742004-10-20 14:54:17 +0000942 new_pointer = (byte *) mem_realloc ((**ba).data, (**ba).len * sizeof (byte),
943 nlen * sizeof (byte));
Michal Krola904b492004-03-04 13:07:52 +0000944 if (new_pointer)
945 {
946 (**ba).data = new_pointer;
947 (**ba).len = nlen;
948
949 return 0;
950 }
951 }
952
953 return 1;
954}
955
956/*
957 adds byte array pointed by *nb to the end of array pointed by *ba,
958 returns 0 on success,
959 returns 1 otherwise
960*/
961static int barray_append (barray **ba, barray **nb)
962{
963 const unsigned int len = (**ba).len;
964
965 if (barray_resize (ba, (**ba).len + (**nb).len))
966 return 1;
967
968 mem_copy ((**ba).data + len, (**nb).data, (**nb).len);
969
970 return 0;
971}
972
973/*
974 adds emit chain pointed by em to the end of array pointed by *ba,
975 returns 0 on success,
976 returns 1 otherwise
977*/
978static int barray_push (barray **ba, emit *em, byte c, unsigned int pos, regbyte_ctx **rbc)
979{
Michal Krol904ef742004-10-20 14:54:17 +0000980 unsigned int count = emit_size (em);
Michal Krola904b492004-03-04 13:07:52 +0000981
982 if (barray_resize (ba, (**ba).len + count))
983 return 1;
984
Michal Krol904ef742004-10-20 14:54:17 +0000985 return emit_push (em, (**ba).data + ((**ba).len - count), c, pos, rbc);
986}
987
988/*
989 byte pool typedef
990*/
991typedef struct bytepool_
992{
993 byte *_F;
994 unsigned int _Siz;
995} bytepool;
996
997static void bytepool_destroy (bytepool **by)
998{
999 if (*by != NULL)
Michal Krola904b492004-03-04 13:07:52 +00001000 {
Michal Krol904ef742004-10-20 14:54:17 +00001001 mem_free ((void **) &(**by)._F);
1002 mem_free ((void **) by);
1003 }
1004}
Michal Krola904b492004-03-04 13:07:52 +00001005
Michal Krol904ef742004-10-20 14:54:17 +00001006static void bytepool_create (bytepool **by, int len)
1007{
1008 *by = (bytepool *) (mem_alloc (sizeof (bytepool)));
1009 if (*by != NULL)
1010 {
1011 (**by)._F = (byte *) (mem_alloc (sizeof (byte) * len));
1012 (**by)._Siz = len;
Michal Krola904b492004-03-04 13:07:52 +00001013
Michal Krol904ef742004-10-20 14:54:17 +00001014 if ((**by)._F == NULL)
1015 bytepool_destroy (by);
1016 }
1017}
Michal Krola904b492004-03-04 13:07:52 +00001018
Ian Romanick9f23a3a2005-07-28 00:11:10 +00001019static int bytepool_reserve (bytepool *by, unsigned int n)
Michal Krol904ef742004-10-20 14:54:17 +00001020{
1021 byte *_P;
1022
Ian Romanick9f23a3a2005-07-28 00:11:10 +00001023 if (n <= by->_Siz)
Michal Krol904ef742004-10-20 14:54:17 +00001024 return 0;
1025
1026 /* byte pool can only grow and at least by doubling its size */
Ian Romanick9f23a3a2005-07-28 00:11:10 +00001027 n = n >= by->_Siz * 2 ? n : by->_Siz * 2;
Michal Krol904ef742004-10-20 14:54:17 +00001028
1029 /* reallocate the memory and adjust pointers to the new memory location */
Ian Romanick9f23a3a2005-07-28 00:11:10 +00001030 _P = (byte *) (mem_realloc (by->_F, sizeof (byte) * by->_Siz, sizeof (byte) * n));
Michal Krol904ef742004-10-20 14:54:17 +00001031 if (_P != NULL)
1032 {
1033 by->_F = _P;
Ian Romanick9f23a3a2005-07-28 00:11:10 +00001034 by->_Siz = n;
Michal Krol904ef742004-10-20 14:54:17 +00001035 return 0;
Michal Krola904b492004-03-04 13:07:52 +00001036 }
1037
Michal Krol904ef742004-10-20 14:54:17 +00001038 return 1;
Michal Krola904b492004-03-04 13:07:52 +00001039}
1040
1041/*
1042 string to string map typedef
1043*/
1044typedef struct map_str_
1045{
1046 byte *key;
1047 byte *data;
1048 struct map_str_ *next;
1049} map_str;
1050
1051static void map_str_create (map_str **ma)
1052{
Brian Paulbdd15b52004-05-04 15:11:06 +00001053 *ma = (map_str *) mem_alloc (sizeof (map_str));
Michal Krola904b492004-03-04 13:07:52 +00001054 if (*ma)
1055 {
1056 (**ma).key = NULL;
1057 (**ma).data = NULL;
1058 (**ma).next = NULL;
1059 }
1060}
1061
1062static void map_str_destroy (map_str **ma)
1063{
1064 if (*ma)
1065 {
1066 map_str_destroy (&(**ma).next);
1067 mem_free ((void **) &(**ma).key);
1068 mem_free ((void **) &(**ma).data);
1069 mem_free ((void **) ma);
1070 }
1071}
1072
Michal Krol904ef742004-10-20 14:54:17 +00001073GRAMMAR_IMPLEMENT_LIST_APPEND(map_str)
Michal Krola904b492004-03-04 13:07:52 +00001074
1075/*
1076 searches the map for specified key,
1077 if the key is matched, *data is filled with data associated with the key,
1078 returns 0 if the key is matched,
1079 returns 1 otherwise
1080*/
1081static int map_str_find (map_str **ma, const byte *key, byte **data)
1082{
1083 while (*ma)
1084 {
1085 if (str_equal ((**ma).key, key))
1086 {
1087 *data = str_duplicate ((**ma).data);
1088 if (*data == NULL)
1089 return 1;
1090
1091 return 0;
1092 }
1093
1094 ma = &(**ma).next;
1095 }
1096
1097 set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
1098 return 1;
1099}
1100
1101/*
1102 string to rule map typedef
1103*/
1104typedef struct map_rule_
1105{
1106 byte *key;
1107 rule *data;
1108 struct map_rule_ *next;
1109} map_rule;
1110
1111static void map_rule_create (map_rule **ma)
1112{
Brian Paulbdd15b52004-05-04 15:11:06 +00001113 *ma = (map_rule *) mem_alloc (sizeof (map_rule));
Michal Krola904b492004-03-04 13:07:52 +00001114 if (*ma)
1115 {
1116 (**ma).key = NULL;
1117 (**ma).data = NULL;
1118 (**ma).next = NULL;
1119 }
1120}
1121
1122static void map_rule_destroy (map_rule **ma)
1123{
1124 if (*ma)
1125 {
1126 map_rule_destroy (&(**ma).next);
1127 mem_free ((void **) &(**ma).key);
1128 mem_free ((void **) ma);
1129 }
1130}
1131
Michal Krol904ef742004-10-20 14:54:17 +00001132GRAMMAR_IMPLEMENT_LIST_APPEND(map_rule)
Michal Krola904b492004-03-04 13:07:52 +00001133
1134/*
1135 searches the map for specified key,
1136 if the key is matched, *data is filled with data associated with the key,
1137 returns 0 if the is matched,
1138 returns 1 otherwise
1139*/
1140static int map_rule_find (map_rule **ma, const byte *key, rule **data)
1141{
1142 while (*ma)
1143 {
1144 if (str_equal ((**ma).key, key))
1145 {
1146 *data = (**ma).data;
1147
1148 return 0;
1149 }
1150
1151 ma = &(**ma).next;
1152 }
1153
1154 set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
1155 return 1;
1156}
1157
1158/*
1159 returns 1 if given character is a white space,
1160 returns 0 otherwise
1161*/
1162static int is_space (byte c)
1163{
1164 return c == ' ' || c == '\t' || c == '\n' || c == '\r';
1165}
1166
1167/*
1168 advances text pointer by 1 if character pointed by *text is a space,
1169 returns 1 if a space has been eaten,
1170 returns 0 otherwise
1171*/
1172static int eat_space (const byte **text)
1173{
1174 if (is_space (**text))
1175 {
1176 (*text)++;
1177
1178 return 1;
1179 }
1180
1181 return 0;
1182}
1183
1184/*
Brian Paul788461f2004-03-29 14:53:49 +00001185 returns 1 if text points to C-style comment start string,
Michal Krola904b492004-03-04 13:07:52 +00001186 returns 0 otherwise
1187*/
1188static int is_comment_start (const byte *text)
1189{
1190 return text[0] == '/' && text[1] == '*';
1191}
1192
1193/*
1194 advances text pointer to first character after C-style comment block - if any,
1195 returns 1 if C-style comment block has been encountered and eaten,
1196 returns 0 otherwise
1197*/
1198static int eat_comment (const byte **text)
1199{
1200 if (is_comment_start (*text))
1201 {
1202 /* *text points to comment block - skip two characters to enter comment body */
1203 *text += 2;
1204 /* skip any character except consecutive '*' and '/' */
1205 while (!((*text)[0] == '*' && (*text)[1] == '/'))
1206 (*text)++;
1207 /* skip those two terminating characters */
1208 *text += 2;
1209
1210 return 1;
1211 }
1212
1213 return 0;
1214}
1215
1216/*
1217 advances text pointer to first character that is neither space nor C-style comment block
1218*/
1219static void eat_spaces (const byte **text)
1220{
1221 while (eat_space (text) || eat_comment (text))
1222 ;
1223}
1224
1225/*
1226 resizes string pointed by *ptr to successfully add character c to the end of the string,
1227 returns 0 on success,
1228 returns 1 otherwise
1229*/
1230static int string_grow (byte **ptr, unsigned int *len, byte c)
1231{
1232 /* reallocate the string in 16-byte increments */
1233 if ((*len & 0x0F) == 0x0F || *ptr == NULL)
1234 {
Brian Paulbdd15b52004-05-04 15:11:06 +00001235 byte *tmp = (byte *) mem_realloc (*ptr, ((*len + 1) & ~0x0F) * sizeof (byte),
Michal Krola904b492004-03-04 13:07:52 +00001236 ((*len + 1 + 0x10) & ~0x0F) * sizeof (byte));
1237 if (tmp == NULL)
1238 return 1;
1239
1240 *ptr = tmp;
1241 }
1242
1243 if (c)
1244 {
1245 /* append given character */
1246 (*ptr)[*len] = c;
1247 (*len)++;
1248 }
1249 (*ptr)[*len] = '\0';
1250
1251 return 0;
1252}
1253
1254/*
1255 returns 1 if given character is a valid identifier character a-z, A-Z, 0-9 or _
1256 returns 0 otherwise
1257*/
1258static int is_identifier (byte c)
1259{
1260 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
1261}
1262
1263/*
1264 copies characters from *text to *id until non-identifier character is encountered,
1265 assumes that *id points to NULL object - caller is responsible for later freeing the string,
1266 text pointer is advanced to point past the copied identifier,
1267 returns 0 if identifier was successfully copied,
1268 returns 1 otherwise
1269*/
1270static int get_identifier (const byte **text, byte **id)
1271{
1272 const byte *t = *text;
1273 byte *p = NULL;
1274 unsigned int len = 0;
1275
1276 if (string_grow (&p, &len, '\0'))
1277 return 1;
1278
1279 /* loop while next character in buffer is valid for identifiers */
1280 while (is_identifier (*t))
1281 {
1282 if (string_grow (&p, &len, *t++))
1283 {
Brian Paula6c423d2004-08-25 15:59:48 +00001284 mem_free ((void **) (void *) &p);
Michal Krola904b492004-03-04 13:07:52 +00001285 return 1;
1286 }
1287 }
1288
1289 *text = t;
1290 *id = p;
1291
1292 return 0;
1293}
1294
1295/*
Michal Krol904ef742004-10-20 14:54:17 +00001296 converts sequence of DEC digits pointed by *text until non-DEC digit is encountered,
1297 advances text pointer past the converted sequence,
1298 returns the converted value
1299*/
1300static unsigned int dec_convert (const byte **text)
1301{
1302 unsigned int value = 0;
1303
1304 while (**text >= '0' && **text <= '9')
1305 {
1306 value = value * 10 + **text - '0';
1307 (*text)++;
1308 }
1309
1310 return value;
1311}
1312
1313/*
Michal Krola904b492004-03-04 13:07:52 +00001314 returns 1 if given character is HEX digit 0-9, A-F or a-f,
1315 returns 0 otherwise
1316*/
1317static int is_hex (byte c)
1318{
1319 return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
1320}
1321
1322/*
1323 returns value of passed character as if it was HEX digit
1324*/
1325static unsigned int hex2dec (byte c)
1326{
1327 if (c >= '0' && c <= '9')
1328 return c - '0';
1329 if (c >= 'A' && c <= 'F')
1330 return c - 'A' + 10;
1331 return c - 'a' + 10;
1332}
1333
1334/*
1335 converts sequence of HEX digits pointed by *text until non-HEX digit is encountered,
1336 advances text pointer past the converted sequence,
1337 returns the converted value
1338*/
1339static unsigned int hex_convert (const byte **text)
1340{
1341 unsigned int value = 0;
1342
1343 while (is_hex (**text))
1344 {
1345 value = value * 0x10 + hex2dec (**text);
1346 (*text)++;
1347 }
1348
1349 return value;
1350}
1351
1352/*
1353 returns 1 if given character is OCT digit 0-7,
1354 returns 0 otherwise
1355*/
1356static int is_oct (byte c)
1357{
1358 return c >= '0' && c <= '7';
1359}
1360
1361/*
1362 returns value of passed character as if it was OCT digit
1363*/
1364static int oct2dec (byte c)
1365{
1366 return c - '0';
1367}
1368
1369static byte get_escape_sequence (const byte **text)
1370{
1371 int value = 0;
1372
1373 /* skip '\' character */
1374 (*text)++;
1375
1376 switch (*(*text)++)
1377 {
1378 case '\'':
1379 return '\'';
1380 case '"':
1381 return '\"';
1382 case '?':
1383 return '\?';
1384 case '\\':
1385 return '\\';
1386 case 'a':
1387 return '\a';
1388 case 'b':
1389 return '\b';
1390 case 'f':
1391 return '\f';
1392 case 'n':
1393 return '\n';
1394 case 'r':
1395 return '\r';
1396 case 't':
1397 return '\t';
1398 case 'v':
1399 return '\v';
1400 case 'x':
1401 return (byte) hex_convert (text);
1402 }
1403
1404 (*text)--;
1405 if (is_oct (**text))
1406 {
1407 value = oct2dec (*(*text)++);
1408 if (is_oct (**text))
1409 {
1410 value = value * 010 + oct2dec (*(*text)++);
1411 if (is_oct (**text))
1412 value = value * 010 + oct2dec (*(*text)++);
1413 }
1414 }
1415
1416 return (byte) value;
1417}
1418
1419/*
1420 copies characters from *text to *str until " or ' character is encountered,
1421 assumes that *str points to NULL object - caller is responsible for later freeing the string,
1422 assumes that *text points to " or ' character that starts the string,
1423 text pointer is advanced to point past the " or ' character,
1424 returns 0 if string was successfully copied,
1425 returns 1 otherwise
1426*/
1427static int get_string (const byte **text, byte **str)
1428{
1429 const byte *t = *text;
1430 byte *p = NULL;
1431 unsigned int len = 0;
1432 byte term_char;
1433
1434 if (string_grow (&p, &len, '\0'))
1435 return 1;
1436
1437 /* read " or ' character that starts the string */
1438 term_char = *t++;
1439 /* while next character is not the terminating character */
1440 while (*t && *t != term_char)
1441 {
1442 byte c;
1443
1444 if (*t == '\\')
1445 c = get_escape_sequence (&t);
1446 else
1447 c = *t++;
1448
1449 if (string_grow (&p, &len, c))
1450 {
Brian Paula6c423d2004-08-25 15:59:48 +00001451 mem_free ((void **) (void *) &p);
Michal Krola904b492004-03-04 13:07:52 +00001452 return 1;
1453 }
1454 }
1455 /* skip " or ' character that ends the string */
1456 t++;
1457
1458 *text = t;
1459 *str = p;
1460 return 0;
1461}
1462
1463/*
Michal Krol904ef742004-10-20 14:54:17 +00001464 gets emit code, the syntax is:
1465 ".emtcode" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
Michal Krola904b492004-03-04 13:07:52 +00001466 assumes that *text already points to <symbol>,
1467 returns 0 if emit code is successfully read,
1468 returns 1 otherwise
1469*/
1470static int get_emtcode (const byte **text, map_byte **ma)
1471{
1472 const byte *t = *text;
1473 map_byte *m = NULL;
1474
1475 map_byte_create (&m);
1476 if (m == NULL)
1477 return 1;
1478
1479 if (get_identifier (&t, &m->key))
1480 {
1481 map_byte_destroy (&m);
1482 return 1;
1483 }
1484 eat_spaces (&t);
1485
1486 if (*t == '\'')
1487 {
1488 byte *c;
1489
1490 if (get_string (&t, &c))
1491 {
1492 map_byte_destroy (&m);
1493 return 1;
1494 }
1495
1496 m->data = (byte) c[0];
Brian Paula6c423d2004-08-25 15:59:48 +00001497 mem_free ((void **) (void *) &c);
Michal Krola904b492004-03-04 13:07:52 +00001498 }
Michal Krol904ef742004-10-20 14:54:17 +00001499 else if (t[0] == '0' && (t[1] == 'x' || t[1] == 'X'))
Michal Krola904b492004-03-04 13:07:52 +00001500 {
1501 /* skip HEX "0x" or "0X" prefix */
1502 t += 2;
1503 m->data = (byte) hex_convert (&t);
1504 }
Michal Krol904ef742004-10-20 14:54:17 +00001505 else
1506 {
1507 m->data = (byte) dec_convert (&t);
1508 }
Michal Krola904b492004-03-04 13:07:52 +00001509
1510 eat_spaces (&t);
1511
1512 *text = t;
1513 *ma = m;
1514 return 0;
1515}
1516
1517/*
Michal Krol904ef742004-10-20 14:54:17 +00001518 gets regbyte declaration, the syntax is:
1519 ".regbyte" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
Michal Krola904b492004-03-04 13:07:52 +00001520 assumes that *text already points to <symbol>,
1521 returns 0 if regbyte is successfully read,
1522 returns 1 otherwise
1523*/
1524static int get_regbyte (const byte **text, map_byte **ma)
1525{
Michal Krol904ef742004-10-20 14:54:17 +00001526 /* pass it to the emtcode parser as it has the same syntax starting at <symbol> */
Michal Krola904b492004-03-04 13:07:52 +00001527 return get_emtcode (text, ma);
1528}
1529
1530/*
1531 returns 0 on success,
1532 returns 1 otherwise
1533*/
1534static int get_errtext (const byte **text, map_str **ma)
1535{
1536 const byte *t = *text;
1537 map_str *m = NULL;
1538
1539 map_str_create (&m);
1540 if (m == NULL)
1541 return 1;
1542
1543 if (get_identifier (&t, &m->key))
1544 {
1545 map_str_destroy (&m);
1546 return 1;
1547 }
1548 eat_spaces (&t);
1549
1550 if (get_string (&t, &m->data))
1551 {
1552 map_str_destroy (&m);
1553 return 1;
1554 }
1555 eat_spaces (&t);
1556
1557 *text = t;
1558 *ma = m;
1559 return 0;
1560}
1561
1562/*
1563 returns 0 on success,
1564 returns 1 otherwise,
1565*/
1566static int get_error (const byte **text, error **er, map_str *maps)
1567{
1568 const byte *t = *text;
1569 byte *temp = NULL;
1570
1571 if (*t != '.')
1572 return 0;
1573
1574 t++;
1575 if (get_identifier (&t, &temp))
1576 return 1;
1577 eat_spaces (&t);
1578
1579 if (!str_equal ((byte *) "error", temp))
1580 {
Brian Paula6c423d2004-08-25 15:59:48 +00001581 mem_free ((void **) (void *) &temp);
Michal Krola904b492004-03-04 13:07:52 +00001582 return 0;
1583 }
1584
Brian Paula6c423d2004-08-25 15:59:48 +00001585 mem_free ((void **) (void *) &temp);
Michal Krola904b492004-03-04 13:07:52 +00001586
1587 error_create (er);
1588 if (*er == NULL)
1589 return 1;
1590
1591 if (*t == '\"')
1592 {
1593 if (get_string (&t, &(**er).m_text))
1594 {
1595 error_destroy (er);
1596 return 1;
1597 }
1598 eat_spaces (&t);
1599 }
1600 else
1601 {
1602 if (get_identifier (&t, &temp))
1603 {
1604 error_destroy (er);
1605 return 1;
1606 }
1607 eat_spaces (&t);
1608
1609 if (map_str_find (&maps, temp, &(**er).m_text))
1610 {
Brian Paula6c423d2004-08-25 15:59:48 +00001611 mem_free ((void **) (void *) &temp);
Michal Krola904b492004-03-04 13:07:52 +00001612 error_destroy (er);
1613 return 1;
1614 }
1615
Brian Paula6c423d2004-08-25 15:59:48 +00001616 mem_free ((void **) (void *) &temp);
Michal Krola904b492004-03-04 13:07:52 +00001617 }
1618
1619 /* try to extract "token" from "...$token$..." */
1620 {
1621 byte *processed = NULL;
Michal Krola79d4e72006-10-19 08:07:00 +00001622 unsigned int len = 0;
1623 int i = 0;
Michal Krola904b492004-03-04 13:07:52 +00001624
1625 if (string_grow (&processed, &len, '\0'))
1626 {
1627 error_destroy (er);
1628 return 1;
1629 }
1630
1631 while (i < str_length ((**er).m_text))
1632 {
1633 /* check if the dollar sign is repeated - if so skip it */
1634 if ((**er).m_text[i] == '$' && (**er).m_text[i + 1] == '$')
1635 {
1636 if (string_grow (&processed, &len, '$'))
1637 {
Brian Paula6c423d2004-08-25 15:59:48 +00001638 mem_free ((void **) (void *) &processed);
Michal Krola904b492004-03-04 13:07:52 +00001639 error_destroy (er);
1640 return 1;
1641 }
1642
1643 i += 2;
1644 }
1645 else if ((**er).m_text[i] != '$')
1646 {
1647 if (string_grow (&processed, &len, (**er).m_text[i]))
1648 {
Brian Paula6c423d2004-08-25 15:59:48 +00001649 mem_free ((void **) (void *) &processed);
Michal Krola904b492004-03-04 13:07:52 +00001650 error_destroy (er);
1651 return 1;
1652 }
1653
1654 i++;
1655 }
1656 else
1657 {
1658 if (string_grow (&processed, &len, '$'))
1659 {
Brian Paula6c423d2004-08-25 15:59:48 +00001660 mem_free ((void **) (void *) &processed);
Michal Krola904b492004-03-04 13:07:52 +00001661 error_destroy (er);
1662 return 1;
1663 }
1664
1665 {
1666 /* length of token being extracted */
1667 unsigned int tlen = 0;
1668
1669 if (string_grow (&(**er).m_token_name, &tlen, '\0'))
1670 {
Brian Paula6c423d2004-08-25 15:59:48 +00001671 mem_free ((void **) (void *) &processed);
Michal Krola904b492004-03-04 13:07:52 +00001672 error_destroy (er);
1673 return 1;
1674 }
1675
1676 /* skip the dollar sign */
1677 i++;
1678
1679 while ((**er).m_text[i] != '$')
1680 {
1681 if (string_grow (&(**er).m_token_name, &tlen, (**er).m_text[i]))
1682 {
Brian Paula6c423d2004-08-25 15:59:48 +00001683 mem_free ((void **) (void *) &processed);
Michal Krola904b492004-03-04 13:07:52 +00001684 error_destroy (er);
1685 return 1;
1686 }
1687
1688 i++;
1689 }
1690
1691 /* skip the dollar sign */
1692 i++;
1693 }
1694 }
1695 }
1696
1697 mem_free ((void **) &(**er).m_text);
1698 (**er).m_text = processed;
1699 }
1700
1701 *text = t;
1702 return 0;
1703}
1704
1705/*
1706 returns 0 on success,
1707 returns 1 otherwise,
1708*/
1709static int get_emits (const byte **text, emit **em, map_byte *mapb)
1710{
1711 const byte *t = *text;
1712 byte *temp = NULL;
1713 emit *e = NULL;
1714 emit_dest dest;
1715
1716 if (*t != '.')
1717 return 0;
1718
1719 t++;
1720 if (get_identifier (&t, &temp))
1721 return 1;
1722 eat_spaces (&t);
1723
1724 /* .emit */
1725 if (str_equal ((byte *) "emit", temp))
1726 dest = ed_output;
1727 /* .load */
1728 else if (str_equal ((byte *) "load", temp))
1729 dest = ed_regbyte;
1730 else
1731 {
Brian Paula6c423d2004-08-25 15:59:48 +00001732 mem_free ((void **) (void *) &temp);
Michal Krola904b492004-03-04 13:07:52 +00001733 return 0;
1734 }
1735
Brian Paula6c423d2004-08-25 15:59:48 +00001736 mem_free ((void **) (void *) &temp);
Michal Krola904b492004-03-04 13:07:52 +00001737
1738 emit_create (&e);
1739 if (e == NULL)
1740 return 1;
1741
1742 e->m_emit_dest = dest;
1743
1744 if (dest == ed_regbyte)
1745 {
1746 if (get_identifier (&t, &e->m_regname))
1747 {
1748 emit_destroy (&e);
1749 return 1;
1750 }
1751 eat_spaces (&t);
1752 }
1753
1754 /* 0xNN */
Michal Krol904ef742004-10-20 14:54:17 +00001755 if (*t == '0' && (t[1] == 'x' || t[1] == 'X'))
Michal Krola904b492004-03-04 13:07:52 +00001756 {
1757 t += 2;
1758 e->m_byte = (byte) hex_convert (&t);
1759
1760 e->m_emit_type = et_byte;
1761 }
Michal Krol904ef742004-10-20 14:54:17 +00001762 /* NNN */
1763 else if (*t >= '0' && *t <= '9')
1764 {
1765 e->m_byte = (byte) dec_convert (&t);
1766
1767 e->m_emit_type = et_byte;
1768 }
Michal Krola904b492004-03-04 13:07:52 +00001769 /* * */
1770 else if (*t == '*')
1771 {
1772 t++;
1773
1774 e->m_emit_type = et_stream;
1775 }
1776 /* $ */
1777 else if (*t == '$')
1778 {
1779 t++;
1780
1781 e->m_emit_type = et_position;
1782 }
1783 /* 'c' */
1784 else if (*t == '\'')
1785 {
1786 if (get_string (&t, &temp))
1787 {
1788 emit_destroy (&e);
1789 return 1;
1790 }
1791 e->m_byte = (byte) temp[0];
1792
Brian Paula6c423d2004-08-25 15:59:48 +00001793 mem_free ((void **) (void *) &temp);
Michal Krola904b492004-03-04 13:07:52 +00001794
1795 e->m_emit_type = et_byte;
1796 }
1797 else
1798 {
1799 if (get_identifier (&t, &temp))
1800 {
1801 emit_destroy (&e);
1802 return 1;
1803 }
1804
1805 if (map_byte_find (&mapb, temp, &e->m_byte))
1806 {
Brian Paula6c423d2004-08-25 15:59:48 +00001807 mem_free ((void **) (void *) &temp);
Michal Krola904b492004-03-04 13:07:52 +00001808 emit_destroy (&e);
1809 return 1;
1810 }
1811
Brian Paula6c423d2004-08-25 15:59:48 +00001812 mem_free ((void **) (void *) &temp);
Michal Krola904b492004-03-04 13:07:52 +00001813
1814 e->m_emit_type = et_byte;
1815 }
1816
1817 eat_spaces (&t);
1818
1819 if (get_emits (&t, &e->m_next, mapb))
1820 {
1821 emit_destroy (&e);
1822 return 1;
1823 }
1824
1825 *text = t;
1826 *em = e;
1827 return 0;
1828}
1829
1830/*
1831 returns 0 on success,
1832 returns 1 otherwise,
1833*/
1834static int get_spec (const byte **text, spec **sp, map_str *maps, map_byte *mapb)
1835{
1836 const byte *t = *text;
1837 spec *s = NULL;
1838
1839 spec_create (&s);
1840 if (s == NULL)
1841 return 1;
1842
1843 /* first - read optional .if statement */
1844 if (*t == '.')
1845 {
1846 const byte *u = t;
1847 byte *keyword = NULL;
1848
1849 /* skip the dot */
1850 u++;
1851
1852 if (get_identifier (&u, &keyword))
1853 {
1854 spec_destroy (&s);
1855 return 1;
1856 }
1857
1858 /* .if */
1859 if (str_equal ((byte *) "if", keyword))
1860 {
1861 cond_create (&s->m_cond);
1862 if (s->m_cond == NULL)
1863 {
1864 spec_destroy (&s);
1865 return 1;
1866 }
1867
1868 /* skip the left paren */
1869 eat_spaces (&u);
1870 u++;
1871
1872 /* get the left operand */
1873 eat_spaces (&u);
1874 if (get_identifier (&u, &s->m_cond->m_operands[0].m_regname))
1875 {
1876 spec_destroy (&s);
1877 return 1;
1878 }
1879 s->m_cond->m_operands[0].m_type = cot_regbyte;
1880
1881 /* get the operator (!= or ==) */
1882 eat_spaces (&u);
1883 if (*u == '!')
1884 s->m_cond->m_type = ct_not_equal;
1885 else
1886 s->m_cond->m_type = ct_equal;
1887 u += 2;
Michal Krola904b492004-03-04 13:07:52 +00001888 eat_spaces (&u);
Michal Krola904b492004-03-04 13:07:52 +00001889
Michal Krol904ef742004-10-20 14:54:17 +00001890 if (u[0] == '0' && (u[1] == 'x' || u[1] == 'X'))
1891 {
1892 /* skip the 0x prefix */
1893 u += 2;
1894
1895 /* get the right operand */
1896 s->m_cond->m_operands[1].m_byte = hex_convert (&u);
1897 s->m_cond->m_operands[1].m_type = cot_byte;
1898 }
1899 else /*if (*u >= '0' && *u <= '9')*/
1900 {
1901 /* get the right operand */
1902 s->m_cond->m_operands[1].m_byte = dec_convert (&u);
1903 s->m_cond->m_operands[1].m_type = cot_byte;
1904 }
Michal Krola904b492004-03-04 13:07:52 +00001905
1906 /* skip the right paren */
1907 eat_spaces (&u);
1908 u++;
1909
1910 eat_spaces (&u);
1911
1912 t = u;
1913 }
1914
Brian Paula6c423d2004-08-25 15:59:48 +00001915 mem_free ((void **) (void *) &keyword);
Michal Krola904b492004-03-04 13:07:52 +00001916 }
1917
1918 if (*t == '\'')
1919 {
1920 byte *temp = NULL;
1921
1922 if (get_string (&t, &temp))
1923 {
1924 spec_destroy (&s);
1925 return 1;
1926 }
1927 eat_spaces (&t);
1928
1929 if (*t == '-')
1930 {
1931 byte *temp2 = NULL;
1932
1933 /* skip the '-' character */
1934 t++;
1935 eat_spaces (&t);
1936
1937 if (get_string (&t, &temp2))
1938 {
Brian Paula6c423d2004-08-25 15:59:48 +00001939 mem_free ((void **) (void *) &temp);
Michal Krola904b492004-03-04 13:07:52 +00001940 spec_destroy (&s);
1941 return 1;
1942 }
1943 eat_spaces (&t);
1944
1945 s->m_spec_type = st_byte_range;
1946 s->m_byte[0] = *temp;
1947 s->m_byte[1] = *temp2;
1948
Brian Paula6c423d2004-08-25 15:59:48 +00001949 mem_free ((void **) (void *) &temp2);
Michal Krola904b492004-03-04 13:07:52 +00001950 }
1951 else
1952 {
1953 s->m_spec_type = st_byte;
1954 *s->m_byte = *temp;
1955 }
1956
Brian Paula6c423d2004-08-25 15:59:48 +00001957 mem_free ((void **) (void *) &temp);
Michal Krola904b492004-03-04 13:07:52 +00001958 }
1959 else if (*t == '"')
1960 {
1961 if (get_string (&t, &s->m_string))
1962 {
1963 spec_destroy (&s);
1964 return 1;
1965 }
1966 eat_spaces (&t);
1967
1968 s->m_spec_type = st_string;
1969 }
1970 else if (*t == '.')
1971 {
1972 byte *keyword = NULL;
1973
1974 /* skip the dot */
1975 t++;
1976
1977 if (get_identifier (&t, &keyword))
1978 {
1979 spec_destroy (&s);
1980 return 1;
1981 }
1982 eat_spaces (&t);
1983
1984 /* .true */
1985 if (str_equal ((byte *) "true", keyword))
1986 {
1987 s->m_spec_type = st_true;
1988 }
1989 /* .false */
1990 else if (str_equal ((byte *) "false", keyword))
1991 {
1992 s->m_spec_type = st_false;
1993 }
1994 /* .debug */
1995 else if (str_equal ((byte *) "debug", keyword))
1996 {
1997 s->m_spec_type = st_debug;
1998 }
1999 /* .loop */
2000 else if (str_equal ((byte *) "loop", keyword))
2001 {
2002 if (get_identifier (&t, &s->m_string))
2003 {
Brian Paula6c423d2004-08-25 15:59:48 +00002004 mem_free ((void **) (void *) &keyword);
Michal Krola904b492004-03-04 13:07:52 +00002005 spec_destroy (&s);
2006 return 1;
2007 }
2008 eat_spaces (&t);
2009
2010 s->m_spec_type = st_identifier_loop;
2011 }
Brian Paula6c423d2004-08-25 15:59:48 +00002012 mem_free ((void **) (void *) &keyword);
Michal Krola904b492004-03-04 13:07:52 +00002013 }
2014 else
2015 {
2016 if (get_identifier (&t, &s->m_string))
2017 {
2018 spec_destroy (&s);
2019 return 1;
2020 }
2021 eat_spaces (&t);
2022
2023 s->m_spec_type = st_identifier;
2024 }
2025
2026 if (get_error (&t, &s->m_errtext, maps))
2027 {
2028 spec_destroy (&s);
2029 return 1;
2030 }
2031
2032 if (get_emits (&t, &s->m_emits, mapb))
2033 {
2034 spec_destroy (&s);
2035 return 1;
2036 }
2037
2038 *text = t;
2039 *sp = s;
2040 return 0;
2041}
2042
2043/*
2044 returns 0 on success,
2045 returns 1 otherwise,
2046*/
2047static int get_rule (const byte **text, rule **ru, map_str *maps, map_byte *mapb)
2048{
2049 const byte *t = *text;
2050 rule *r = NULL;
2051
2052 rule_create (&r);
2053 if (r == NULL)
2054 return 1;
2055
2056 if (get_spec (&t, &r->m_specs, maps, mapb))
2057 {
2058 rule_destroy (&r);
2059 return 1;
2060 }
2061
2062 while (*t != ';')
2063 {
2064 byte *op = NULL;
2065 spec *sp = NULL;
2066
2067 /* skip the dot that precedes "and" or "or" */
2068 t++;
2069
2070 /* read "and" or "or" keyword */
2071 if (get_identifier (&t, &op))
2072 {
2073 rule_destroy (&r);
2074 return 1;
2075 }
2076 eat_spaces (&t);
2077
2078 if (r->m_oper == op_none)
2079 {
2080 /* .and */
2081 if (str_equal ((byte *) "and", op))
2082 r->m_oper = op_and;
2083 /* .or */
2084 else
2085 r->m_oper = op_or;
2086 }
2087
Brian Paula6c423d2004-08-25 15:59:48 +00002088 mem_free ((void **) (void *) &op);
Michal Krola904b492004-03-04 13:07:52 +00002089
2090 if (get_spec (&t, &sp, maps, mapb))
2091 {
2092 rule_destroy (&r);
2093 return 1;
2094 }
2095
Michal Krol904ef742004-10-20 14:54:17 +00002096 spec_append (&r->m_specs, sp);
Michal Krola904b492004-03-04 13:07:52 +00002097 }
2098
2099 /* skip the semicolon */
2100 t++;
2101 eat_spaces (&t);
2102
2103 *text = t;
2104 *ru = r;
2105 return 0;
2106}
2107
2108/*
2109 returns 0 on success,
2110 returns 1 otherwise,
2111*/
2112static int update_dependency (map_rule *mapr, byte *symbol, rule **ru)
2113{
2114 if (map_rule_find (&mapr, symbol, ru))
2115 return 1;
2116
Michal Krol904ef742004-10-20 14:54:17 +00002117 (**ru).m_referenced = 1;
Michal Krola904b492004-03-04 13:07:52 +00002118
2119 return 0;
2120}
2121
2122/*
2123 returns 0 on success,
2124 returns 1 otherwise,
2125*/
2126static int update_dependencies (dict *di, map_rule *mapr, byte **syntax_symbol,
2127 byte **string_symbol, map_byte *regbytes)
2128{
2129 rule *rulez = di->m_rulez;
2130
2131 /* update dependecies for the root and lexer symbols */
2132 if (update_dependency (mapr, *syntax_symbol, &di->m_syntax) ||
2133 (*string_symbol != NULL && update_dependency (mapr, *string_symbol, &di->m_string)))
2134 return 1;
2135
2136 mem_free ((void **) syntax_symbol);
2137 mem_free ((void **) string_symbol);
2138
2139 /* update dependecies for the rest of the rules */
2140 while (rulez)
2141 {
2142 spec *sp = rulez->m_specs;
2143
2144 /* iterate through all the specifiers */
2145 while (sp)
2146 {
2147 /* update dependency for identifier */
2148 if (sp->m_spec_type == st_identifier || sp->m_spec_type == st_identifier_loop)
2149 {
2150 if (update_dependency (mapr, sp->m_string, &sp->m_rule))
2151 return 1;
2152
2153 mem_free ((void **) &sp->m_string);
2154 }
2155
2156 /* some errtexts reference to a rule */
2157 if (sp->m_errtext && sp->m_errtext->m_token_name)
2158 {
2159 if (update_dependency (mapr, sp->m_errtext->m_token_name, &sp->m_errtext->m_token))
2160 return 1;
2161
2162 mem_free ((void **) &sp->m_errtext->m_token_name);
2163 }
2164
2165 /* update dependency for condition */
2166 if (sp->m_cond)
2167 {
2168 int i;
2169 for (i = 0; i < 2; i++)
2170 if (sp->m_cond->m_operands[i].m_type == cot_regbyte)
2171 {
2172 sp->m_cond->m_operands[i].m_regbyte = map_byte_locate (&regbytes,
2173 sp->m_cond->m_operands[i].m_regname);
2174
2175 if (sp->m_cond->m_operands[i].m_regbyte == NULL)
2176 return 1;
2177
2178 mem_free ((void **) &sp->m_cond->m_operands[i].m_regname);
2179 }
2180 }
2181
2182 /* update dependency for all .load instructions */
2183 if (sp->m_emits)
2184 {
2185 emit *em = sp->m_emits;
2186 while (em != NULL)
2187 {
2188 if (em->m_emit_dest == ed_regbyte)
2189 {
2190 em->m_regbyte = map_byte_locate (&regbytes, em->m_regname);
2191
2192 if (em->m_regbyte == NULL)
2193 return 1;
2194
2195 mem_free ((void **) &em->m_regname);
2196 }
2197
2198 em = em->m_next;
2199 }
2200 }
2201
Michal Krol904ef742004-10-20 14:54:17 +00002202 sp = sp->next;
Michal Krola904b492004-03-04 13:07:52 +00002203 }
2204
Michal Krol904ef742004-10-20 14:54:17 +00002205 rulez = rulez->next;
Michal Krola904b492004-03-04 13:07:52 +00002206 }
2207
Michal Krol904ef742004-10-20 14:54:17 +00002208 /* check for unreferenced symbols */
2209 rulez = di->m_rulez;
2210 while (rulez != NULL)
Michal Krola904b492004-03-04 13:07:52 +00002211 {
Michal Krol904ef742004-10-20 14:54:17 +00002212 if (!rulez->m_referenced)
Michal Krola904b492004-03-04 13:07:52 +00002213 {
Michal Krol904ef742004-10-20 14:54:17 +00002214 map_rule *ma = mapr;
Michal Krola904b492004-03-04 13:07:52 +00002215 while (ma)
2216 {
Michal Krol904ef742004-10-20 14:54:17 +00002217 if (ma->data == rulez)
Michal Krola904b492004-03-04 13:07:52 +00002218 {
Michal Krol904ef742004-10-20 14:54:17 +00002219 set_last_error (UNREFERENCED_IDENTIFIER, str_duplicate (ma->key), -1);
2220 return 1;
Michal Krola904b492004-03-04 13:07:52 +00002221 }
2222 ma = ma->next;
2223 }
2224 }
Michal Krol904ef742004-10-20 14:54:17 +00002225 rulez = rulez->next;
Michal Krola904b492004-03-04 13:07:52 +00002226 }
Michal Krol904ef742004-10-20 14:54:17 +00002227
Michal Krola904b492004-03-04 13:07:52 +00002228 return 0;
2229}
2230
2231static int satisfies_condition (cond *co, regbyte_ctx *ctx)
2232{
2233 byte values[2];
2234 int i;
2235
2236 if (co == NULL)
2237 return 1;
2238
2239 for (i = 0; i < 2; i++)
2240 switch (co->m_operands[i].m_type)
2241 {
2242 case cot_byte:
2243 values[i] = co->m_operands[i].m_byte;
2244 break;
2245 case cot_regbyte:
2246 values[i] = regbyte_ctx_extract (&ctx, co->m_operands[i].m_regbyte);
2247 break;
2248 }
2249
2250 switch (co->m_type)
2251 {
2252 case ct_equal:
2253 return values[0] == values[1];
2254 case ct_not_equal:
2255 return values[0] != values[1];
2256 }
2257
2258 return 0;
2259}
2260
2261static void free_regbyte_ctx_stack (regbyte_ctx *top, regbyte_ctx *limit)
2262{
2263 while (top != limit)
2264 {
2265 regbyte_ctx *rbc = top->m_prev;
2266 regbyte_ctx_destroy (&top);
2267 top = rbc;
2268 }
2269}
2270
2271typedef enum match_result_
2272{
2273 mr_not_matched, /* the examined string does not match */
2274 mr_matched, /* the examined string matches */
2275 mr_error_raised, /* mr_not_matched + error has been raised */
2276 mr_dont_emit, /* used by identifier loops only */
2277 mr_internal_error /* an internal error has occured such as out of memory */
2278} match_result;
2279
2280/*
Michal Krola79d4e72006-10-19 08:07:00 +00002281 * This function does the main job. It parses the text and generates output data.
2282 */
2283static match_result
2284match (dict *di, const byte *text, int *index, rule *ru, barray **ba, int filtering_string,
2285 regbyte_ctx **rbc)
Michal Krola904b492004-03-04 13:07:52 +00002286{
Michal Krola79d4e72006-10-19 08:07:00 +00002287 int ind = *index;
Michal Krola904b492004-03-04 13:07:52 +00002288 match_result status = mr_not_matched;
2289 spec *sp = ru->m_specs;
2290 regbyte_ctx *ctx = *rbc;
2291
2292 /* for every specifier in the rule */
2293 while (sp)
2294 {
Michal Krola79d4e72006-10-19 08:07:00 +00002295 int i, len, save_ind = ind;
Michal Krola904b492004-03-04 13:07:52 +00002296 barray *array = NULL;
2297
2298 if (satisfies_condition (sp->m_cond, ctx))
2299 {
2300 switch (sp->m_spec_type)
2301 {
2302 case st_identifier:
2303 barray_create (&array);
2304 if (array == NULL)
2305 {
2306 free_regbyte_ctx_stack (ctx, *rbc);
2307 return mr_internal_error;
2308 }
2309
2310 status = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
Michal Krol904ef742004-10-20 14:54:17 +00002311
Michal Krola904b492004-03-04 13:07:52 +00002312 if (status == mr_internal_error)
2313 {
2314 free_regbyte_ctx_stack (ctx, *rbc);
2315 barray_destroy (&array);
2316 return mr_internal_error;
2317 }
2318 break;
2319 case st_string:
2320 len = str_length (sp->m_string);
2321
2322 /* prefilter the stream */
2323 if (!filtering_string && di->m_string)
2324 {
2325 barray *ba;
Michal Krola79d4e72006-10-19 08:07:00 +00002326 int filter_index = 0;
Michal Krola904b492004-03-04 13:07:52 +00002327 match_result result;
2328 regbyte_ctx *null_ctx = NULL;
2329
2330 barray_create (&ba);
2331 if (ba == NULL)
2332 {
2333 free_regbyte_ctx_stack (ctx, *rbc);
2334 return mr_internal_error;
2335 }
2336
2337 result = match (di, text + ind, &filter_index, di->m_string, &ba, 1, &null_ctx);
2338
2339 if (result == mr_internal_error)
2340 {
2341 free_regbyte_ctx_stack (ctx, *rbc);
2342 barray_destroy (&ba);
2343 return mr_internal_error;
2344 }
2345
2346 if (result != mr_matched)
2347 {
2348 barray_destroy (&ba);
2349 status = mr_not_matched;
2350 break;
2351 }
2352
2353 barray_destroy (&ba);
2354
2355 if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len))
2356 {
2357 status = mr_not_matched;
2358 break;
2359 }
2360
2361 status = mr_matched;
2362 ind += len;
2363 }
2364 else
2365 {
2366 status = mr_matched;
2367 for (i = 0; status == mr_matched && i < len; i++)
2368 if (text[ind + i] != sp->m_string[i])
2369 status = mr_not_matched;
Michal Krol904ef742004-10-20 14:54:17 +00002370
Michal Krola904b492004-03-04 13:07:52 +00002371 if (status == mr_matched)
2372 ind += len;
2373 }
2374 break;
2375 case st_byte:
2376 status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
2377 if (status == mr_matched)
2378 ind++;
2379 break;
2380 case st_byte_range:
2381 status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
2382 mr_matched : mr_not_matched;
2383 if (status == mr_matched)
2384 ind++;
2385 break;
2386 case st_true:
2387 status = mr_matched;
2388 break;
2389 case st_false:
2390 status = mr_not_matched;
2391 break;
2392 case st_debug:
2393 status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
2394 break;
2395 case st_identifier_loop:
2396 barray_create (&array);
2397 if (array == NULL)
2398 {
2399 free_regbyte_ctx_stack (ctx, *rbc);
2400 return mr_internal_error;
2401 }
2402
2403 status = mr_dont_emit;
2404 for (;;)
2405 {
2406 match_result result;
2407
2408 save_ind = ind;
2409 result = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
2410
2411 if (result == mr_error_raised)
2412 {
2413 status = result;
2414 break;
2415 }
2416 else if (result == mr_matched)
2417 {
2418 if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx) ||
2419 barray_append (ba, &array))
2420 {
2421 free_regbyte_ctx_stack (ctx, *rbc);
2422 barray_destroy (&array);
2423 return mr_internal_error;
2424 }
2425 barray_destroy (&array);
2426 barray_create (&array);
2427 if (array == NULL)
2428 {
2429 free_regbyte_ctx_stack (ctx, *rbc);
2430 return mr_internal_error;
2431 }
2432 }
2433 else if (result == mr_internal_error)
2434 {
2435 free_regbyte_ctx_stack (ctx, *rbc);
2436 barray_destroy (&array);
2437 return mr_internal_error;
2438 }
2439 else
2440 break;
2441 }
2442 break;
2443 }
2444 }
2445 else
2446 {
2447 status = mr_not_matched;
2448 }
2449
2450 if (status == mr_error_raised)
2451 {
2452 free_regbyte_ctx_stack (ctx, *rbc);
2453 barray_destroy (&array);
2454
2455 return mr_error_raised;
2456 }
2457
2458 if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
2459 {
2460 free_regbyte_ctx_stack (ctx, *rbc);
2461 barray_destroy (&array);
2462
2463 if (sp->m_errtext)
2464 {
2465 set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
2466 ind), ind);
2467
2468 return mr_error_raised;
2469 }
2470
2471 return mr_not_matched;
2472 }
2473
2474 if (status == mr_matched)
2475 {
2476 if (sp->m_emits)
2477 if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx))
2478 {
2479 free_regbyte_ctx_stack (ctx, *rbc);
2480 barray_destroy (&array);
2481 return mr_internal_error;
2482 }
2483
2484 if (array)
2485 if (barray_append (ba, &array))
2486 {
2487 free_regbyte_ctx_stack (ctx, *rbc);
2488 barray_destroy (&array);
2489 return mr_internal_error;
2490 }
2491 }
2492
2493 barray_destroy (&array);
2494
2495 /* if the rule operator is a logical or, we pick up the first matching specifier */
2496 if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit))
2497 {
2498 *index = ind;
2499 *rbc = ctx;
2500 return mr_matched;
2501 }
2502
Michal Krol904ef742004-10-20 14:54:17 +00002503 sp = sp->next;
Michal Krola904b492004-03-04 13:07:52 +00002504 }
2505
2506 /* everything went fine - all specifiers match up */
2507 if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit))
2508 {
2509 *index = ind;
2510 *rbc = ctx;
2511 return mr_matched;
2512 }
2513
2514 free_regbyte_ctx_stack (ctx, *rbc);
2515 return mr_not_matched;
2516}
2517
Michal Krola79d4e72006-10-19 08:07:00 +00002518static match_result
2519fast_match (dict *di, const byte *text, int *index, rule *ru, int *_PP, bytepool *_BP,
2520 int filtering_string, regbyte_ctx **rbc)
Michal Krol904ef742004-10-20 14:54:17 +00002521{
Michal Krola79d4e72006-10-19 08:07:00 +00002522 int ind = *index;
Michal Krol904ef742004-10-20 14:54:17 +00002523 int _P = filtering_string ? 0 : *_PP;
2524 int _P2;
2525 match_result status = mr_not_matched;
2526 spec *sp = ru->m_specs;
2527 regbyte_ctx *ctx = *rbc;
2528
2529 /* for every specifier in the rule */
2530 while (sp)
2531 {
Michal Krola79d4e72006-10-19 08:07:00 +00002532 int i, len, save_ind = ind;
Michal Krol904ef742004-10-20 14:54:17 +00002533
2534 _P2 = _P + (sp->m_emits ? emit_size (sp->m_emits) : 0);
2535 if (bytepool_reserve (_BP, _P2))
2536 {
2537 free_regbyte_ctx_stack (ctx, *rbc);
2538 return mr_internal_error;
2539 }
2540
2541 if (satisfies_condition (sp->m_cond, ctx))
2542 {
2543 switch (sp->m_spec_type)
2544 {
2545 case st_identifier:
2546 status = fast_match (di, text, &ind, sp->m_rule, &_P2, _BP, filtering_string, &ctx);
2547
2548 if (status == mr_internal_error)
2549 {
2550 free_regbyte_ctx_stack (ctx, *rbc);
2551 return mr_internal_error;
2552 }
2553 break;
2554 case st_string:
2555 len = str_length (sp->m_string);
2556
2557 /* prefilter the stream */
2558 if (!filtering_string && di->m_string)
2559 {
Michal Krola79d4e72006-10-19 08:07:00 +00002560 int filter_index = 0;
Michal Krol904ef742004-10-20 14:54:17 +00002561 match_result result;
2562 regbyte_ctx *null_ctx = NULL;
2563
2564 result = fast_match (di, text + ind, &filter_index, di->m_string, NULL, _BP, 1, &null_ctx);
2565
2566 if (result == mr_internal_error)
2567 {
2568 free_regbyte_ctx_stack (ctx, *rbc);
2569 return mr_internal_error;
2570 }
2571
2572 if (result != mr_matched)
2573 {
2574 status = mr_not_matched;
2575 break;
2576 }
2577
2578 if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len))
2579 {
2580 status = mr_not_matched;
2581 break;
2582 }
2583
2584 status = mr_matched;
2585 ind += len;
2586 }
2587 else
2588 {
2589 status = mr_matched;
2590 for (i = 0; status == mr_matched && i < len; i++)
2591 if (text[ind + i] != sp->m_string[i])
2592 status = mr_not_matched;
2593
2594 if (status == mr_matched)
2595 ind += len;
2596 }
2597 break;
2598 case st_byte:
2599 status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
2600 if (status == mr_matched)
2601 ind++;
2602 break;
2603 case st_byte_range:
2604 status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
2605 mr_matched : mr_not_matched;
2606 if (status == mr_matched)
2607 ind++;
2608 break;
2609 case st_true:
2610 status = mr_matched;
2611 break;
2612 case st_false:
2613 status = mr_not_matched;
2614 break;
2615 case st_debug:
2616 status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
2617 break;
2618 case st_identifier_loop:
2619 status = mr_dont_emit;
2620 for (;;)
2621 {
2622 match_result result;
2623
2624 save_ind = ind;
2625 result = fast_match (di, text, &ind, sp->m_rule, &_P2, _BP, filtering_string, &ctx);
2626
2627 if (result == mr_error_raised)
2628 {
2629 status = result;
2630 break;
2631 }
2632 else if (result == mr_matched)
2633 {
2634 if (!filtering_string)
2635 {
2636 if (sp->m_emits != NULL)
2637 {
2638 if (emit_push (sp->m_emits, _BP->_F + _P, text[ind - 1], save_ind, &ctx))
2639 {
2640 free_regbyte_ctx_stack (ctx, *rbc);
2641 return mr_internal_error;
2642 }
2643 }
2644
2645 _P = _P2;
2646 _P2 += sp->m_emits ? emit_size (sp->m_emits) : 0;
2647 if (bytepool_reserve (_BP, _P2))
2648 {
2649 free_regbyte_ctx_stack (ctx, *rbc);
2650 return mr_internal_error;
2651 }
2652 }
2653 }
2654 else if (result == mr_internal_error)
2655 {
2656 free_regbyte_ctx_stack (ctx, *rbc);
2657 return mr_internal_error;
2658 }
2659 else
2660 break;
2661 }
2662 break;
2663 }
2664 }
2665 else
2666 {
2667 status = mr_not_matched;
2668 }
2669
2670 if (status == mr_error_raised)
2671 {
2672 free_regbyte_ctx_stack (ctx, *rbc);
2673
2674 return mr_error_raised;
2675 }
2676
2677 if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
2678 {
2679 free_regbyte_ctx_stack (ctx, *rbc);
2680
2681 if (sp->m_errtext)
2682 {
2683 set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
2684 ind), ind);
2685
2686 return mr_error_raised;
2687 }
2688
2689 return mr_not_matched;
2690 }
2691
2692 if (status == mr_matched)
2693 {
2694 if (sp->m_emits != NULL)
2695 if (emit_push (sp->m_emits, _BP->_F + _P, text[ind - 1], save_ind, &ctx))
2696 {
2697 free_regbyte_ctx_stack (ctx, *rbc);
2698 return mr_internal_error;
2699 }
2700
2701 _P = _P2;
2702 }
2703
2704 /* if the rule operator is a logical or, we pick up the first matching specifier */
2705 if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit))
2706 {
2707 *index = ind;
2708 *rbc = ctx;
2709 if (!filtering_string)
2710 *_PP = _P;
2711 return mr_matched;
2712 }
2713
2714 sp = sp->next;
2715 }
2716
2717 /* everything went fine - all specifiers match up */
2718 if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit))
2719 {
2720 *index = ind;
2721 *rbc = ctx;
2722 if (!filtering_string)
2723 *_PP = _P;
2724 return mr_matched;
2725 }
2726
2727 free_regbyte_ctx_stack (ctx, *rbc);
2728 return mr_not_matched;
2729}
2730
Michal Krola79d4e72006-10-19 08:07:00 +00002731static byte *
2732error_get_token (error *er, dict *di, const byte *text, int ind)
Michal Krola904b492004-03-04 13:07:52 +00002733{
2734 byte *str = NULL;
2735
2736 if (er->m_token)
2737 {
2738 barray *ba;
Michal Krola79d4e72006-10-19 08:07:00 +00002739 int filter_index = 0;
Michal Krola904b492004-03-04 13:07:52 +00002740 regbyte_ctx *ctx = NULL;
2741
2742 barray_create (&ba);
2743 if (ba != NULL)
2744 {
2745 if (match (di, text + ind, &filter_index, er->m_token, &ba, 0, &ctx) == mr_matched &&
2746 filter_index)
2747 {
Brian Paulbdd15b52004-05-04 15:11:06 +00002748 str = (byte *) mem_alloc (filter_index + 1);
Michal Krola904b492004-03-04 13:07:52 +00002749 if (str != NULL)
2750 {
2751 str_copy_n (str, text + ind, filter_index);
2752 str[filter_index] = '\0';
2753 }
2754 }
2755 barray_destroy (&ba);
2756 }
2757 }
2758
2759 return str;
2760}
2761
2762typedef struct grammar_load_state_
2763{
2764 dict *di;
2765 byte *syntax_symbol;
2766 byte *string_symbol;
2767 map_str *maps;
2768 map_byte *mapb;
2769 map_rule *mapr;
2770} grammar_load_state;
2771
2772static void grammar_load_state_create (grammar_load_state **gr)
2773{
Brian Paulbdd15b52004-05-04 15:11:06 +00002774 *gr = (grammar_load_state *) mem_alloc (sizeof (grammar_load_state));
Michal Krola904b492004-03-04 13:07:52 +00002775 if (*gr)
2776 {
2777 (**gr).di = NULL;
2778 (**gr).syntax_symbol = NULL;
2779 (**gr).string_symbol = NULL;
2780 (**gr).maps = NULL;
2781 (**gr).mapb = NULL;
2782 (**gr).mapr = NULL;
2783 }
2784}
2785
2786static void grammar_load_state_destroy (grammar_load_state **gr)
2787{
2788 if (*gr)
2789 {
2790 dict_destroy (&(**gr).di);
2791 mem_free ((void **) &(**gr).syntax_symbol);
2792 mem_free ((void **) &(**gr).string_symbol);
2793 map_str_destroy (&(**gr).maps);
2794 map_byte_destroy (&(**gr).mapb);
2795 map_rule_destroy (&(**gr).mapr);
2796 mem_free ((void **) gr);
2797 }
2798}
2799
2800/*
2801 the API
2802*/
2803
2804grammar grammar_load_from_text (const byte *text)
2805{
2806 grammar_load_state *g = NULL;
2807 grammar id = 0;
2808
2809 clear_last_error ();
2810
2811 grammar_load_state_create (&g);
2812 if (g == NULL)
2813 return 0;
2814
2815 dict_create (&g->di);
2816 if (g->di == NULL)
2817 {
2818 grammar_load_state_destroy (&g);
2819 return 0;
2820 }
2821
2822 eat_spaces (&text);
2823
2824 /* skip ".syntax" keyword */
2825 text += 7;
2826 eat_spaces (&text);
2827
2828 /* retrieve root symbol */
2829 if (get_identifier (&text, &g->syntax_symbol))
2830 {
2831 grammar_load_state_destroy (&g);
2832 return 0;
2833 }
2834 eat_spaces (&text);
2835
2836 /* skip semicolon */
2837 text++;
2838 eat_spaces (&text);
2839
2840 while (*text)
2841 {
2842 byte *symbol = NULL;
2843 int is_dot = *text == '.';
2844
2845 if (is_dot)
2846 text++;
2847
2848 if (get_identifier (&text, &symbol))
2849 {
2850 grammar_load_state_destroy (&g);
2851 return 0;
2852 }
2853 eat_spaces (&text);
2854
2855 /* .emtcode */
2856 if (is_dot && str_equal (symbol, (byte *) "emtcode"))
2857 {
2858 map_byte *ma = NULL;
2859
Brian Paula6c423d2004-08-25 15:59:48 +00002860 mem_free ((void **) (void *) &symbol);
Michal Krola904b492004-03-04 13:07:52 +00002861
2862 if (get_emtcode (&text, &ma))
2863 {
2864 grammar_load_state_destroy (&g);
2865 return 0;
2866 }
2867
Michal Krol904ef742004-10-20 14:54:17 +00002868 map_byte_append (&g->mapb, ma);
Michal Krola904b492004-03-04 13:07:52 +00002869 }
2870 /* .regbyte */
2871 else if (is_dot && str_equal (symbol, (byte *) "regbyte"))
2872 {
2873 map_byte *ma = NULL;
2874
Brian Paula6c423d2004-08-25 15:59:48 +00002875 mem_free ((void **) (void *) &symbol);
Michal Krola904b492004-03-04 13:07:52 +00002876
2877 if (get_regbyte (&text, &ma))
2878 {
2879 grammar_load_state_destroy (&g);
2880 return 0;
2881 }
2882
Michal Krol904ef742004-10-20 14:54:17 +00002883 map_byte_append (&g->di->m_regbytes, ma);
Michal Krola904b492004-03-04 13:07:52 +00002884 }
2885 /* .errtext */
2886 else if (is_dot && str_equal (symbol, (byte *) "errtext"))
2887 {
2888 map_str *ma = NULL;
2889
Brian Paula6c423d2004-08-25 15:59:48 +00002890 mem_free ((void **) (void *) &symbol);
Michal Krola904b492004-03-04 13:07:52 +00002891
2892 if (get_errtext (&text, &ma))
2893 {
2894 grammar_load_state_destroy (&g);
2895 return 0;
2896 }
2897
Michal Krol904ef742004-10-20 14:54:17 +00002898 map_str_append (&g->maps, ma);
Michal Krola904b492004-03-04 13:07:52 +00002899 }
2900 /* .string */
2901 else if (is_dot && str_equal (symbol, (byte *) "string"))
2902 {
Brian Paula6c423d2004-08-25 15:59:48 +00002903 mem_free ((void **) (void *) &symbol);
Michal Krola904b492004-03-04 13:07:52 +00002904
2905 if (g->di->m_string != NULL)
2906 {
2907 grammar_load_state_destroy (&g);
2908 return 0;
2909 }
2910
2911 if (get_identifier (&text, &g->string_symbol))
2912 {
2913 grammar_load_state_destroy (&g);
2914 return 0;
2915 }
2916
2917 /* skip semicolon */
2918 eat_spaces (&text);
2919 text++;
2920 eat_spaces (&text);
2921 }
2922 else
2923 {
2924 rule *ru = NULL;
2925 map_rule *ma = NULL;
2926
2927 if (get_rule (&text, &ru, g->maps, g->mapb))
2928 {
2929 grammar_load_state_destroy (&g);
2930 return 0;
2931 }
2932
Michal Krol904ef742004-10-20 14:54:17 +00002933 rule_append (&g->di->m_rulez, ru);
Michal Krola904b492004-03-04 13:07:52 +00002934
2935 /* if a rule consist of only one specifier, give it an ".and" operator */
2936 if (ru->m_oper == op_none)
2937 ru->m_oper = op_and;
2938
2939 map_rule_create (&ma);
2940 if (ma == NULL)
2941 {
2942 grammar_load_state_destroy (&g);
2943 return 0;
2944 }
2945
2946 ma->key = symbol;
2947 ma->data = ru;
Michal Krol904ef742004-10-20 14:54:17 +00002948 map_rule_append (&g->mapr, ma);
Michal Krola904b492004-03-04 13:07:52 +00002949 }
2950 }
2951
2952 if (update_dependencies (g->di, g->mapr, &g->syntax_symbol, &g->string_symbol,
2953 g->di->m_regbytes))
2954 {
2955 grammar_load_state_destroy (&g);
2956 return 0;
2957 }
2958
Michal Krol904ef742004-10-20 14:54:17 +00002959 dict_append (&g_dicts, g->di);
Michal Krola904b492004-03-04 13:07:52 +00002960 id = g->di->m_id;
2961 g->di = NULL;
2962
2963 grammar_load_state_destroy (&g);
2964
2965 return id;
2966}
2967
2968int grammar_set_reg8 (grammar id, const byte *name, byte value)
2969{
2970 dict *di = NULL;
2971 map_byte *reg = NULL;
2972
2973 clear_last_error ();
2974
2975 dict_find (&g_dicts, id, &di);
2976 if (di == NULL)
2977 {
2978 set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
2979 return 0;
2980 }
2981
2982 reg = map_byte_locate (&di->m_regbytes, name);
2983 if (reg == NULL)
2984 {
2985 set_last_error (INVALID_REGISTER_NAME, str_duplicate (name), -1);
2986 return 0;
2987 }
2988
2989 reg->data = value;
2990 return 1;
2991}
2992
Michal Krol904ef742004-10-20 14:54:17 +00002993/*
2994 internal checking function used by both grammar_check and grammar_fast_check functions
2995*/
2996static int _grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size,
2997 unsigned int estimate_prod_size, int use_fast_path)
Michal Krola904b492004-03-04 13:07:52 +00002998{
2999 dict *di = NULL;
Michal Krola79d4e72006-10-19 08:07:00 +00003000 int index = 0;
Michal Krola904b492004-03-04 13:07:52 +00003001
3002 clear_last_error ();
3003
3004 dict_find (&g_dicts, id, &di);
3005 if (di == NULL)
3006 {
3007 set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
3008 return 0;
3009 }
3010
Michal Krola904b492004-03-04 13:07:52 +00003011 *prod = NULL;
3012 *size = 0;
3013
Michal Krol904ef742004-10-20 14:54:17 +00003014 if (use_fast_path)
Michal Krola904b492004-03-04 13:07:52 +00003015 {
Michal Krol904ef742004-10-20 14:54:17 +00003016 regbyte_ctx *rbc = NULL;
3017 bytepool *bp = NULL;
3018 int _P = 0;
3019
3020 bytepool_create (&bp, estimate_prod_size);
3021 if (bp == NULL)
3022 return 0;
3023
3024 if (fast_match (di, text, &index, di->m_syntax, &_P, bp, 0, &rbc) != mr_matched)
3025 {
3026 bytepool_destroy (&bp);
3027 free_regbyte_ctx_stack (rbc, NULL);
3028 return 0;
3029 }
3030
Michal Krola904b492004-03-04 13:07:52 +00003031 free_regbyte_ctx_stack (rbc, NULL);
Michal Krol904ef742004-10-20 14:54:17 +00003032
3033 *prod = bp->_F;
3034 *size = _P;
3035 bp->_F = NULL;
3036 bytepool_destroy (&bp);
Michal Krola904b492004-03-04 13:07:52 +00003037 }
Michal Krol904ef742004-10-20 14:54:17 +00003038 else
Michal Krola904b492004-03-04 13:07:52 +00003039 {
Michal Krol904ef742004-10-20 14:54:17 +00003040 regbyte_ctx *rbc = NULL;
3041 barray *ba = NULL;
Michal Krola904b492004-03-04 13:07:52 +00003042
Michal Krol904ef742004-10-20 14:54:17 +00003043 barray_create (&ba);
3044 if (ba == NULL)
3045 return 0;
3046
3047 if (match (di, text, &index, di->m_syntax, &ba, 0, &rbc) != mr_matched)
3048 {
3049 barray_destroy (&ba);
3050 free_regbyte_ctx_stack (rbc, NULL);
3051 return 0;
3052 }
3053
3054 free_regbyte_ctx_stack (rbc, NULL);
3055
3056 *prod = (byte *) mem_alloc (ba->len * sizeof (byte));
3057 if (*prod == NULL)
3058 {
3059 barray_destroy (&ba);
3060 return 0;
3061 }
3062
3063 mem_copy (*prod, ba->data, ba->len * sizeof (byte));
3064 *size = ba->len;
3065 barray_destroy (&ba);
3066 }
Michal Krola904b492004-03-04 13:07:52 +00003067
3068 return 1;
3069}
3070
Michal Krol904ef742004-10-20 14:54:17 +00003071int grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size)
3072{
3073 return _grammar_check (id, text, prod, size, 0, 0);
3074}
3075
3076int grammar_fast_check (grammar id, const byte *text, byte **prod, unsigned int *size,
3077 unsigned int estimate_prod_size)
3078{
3079 return _grammar_check (id, text, prod, size, estimate_prod_size, 1);
3080}
3081
Michal Krola904b492004-03-04 13:07:52 +00003082int grammar_destroy (grammar id)
3083{
3084 dict **di = &g_dicts;
3085
3086 clear_last_error ();
3087
3088 while (*di != NULL)
3089 {
3090 if ((**di).m_id == id)
3091 {
3092 dict *tmp = *di;
Michal Krol904ef742004-10-20 14:54:17 +00003093 *di = (**di).next;
Michal Krola904b492004-03-04 13:07:52 +00003094 dict_destroy (&tmp);
3095 return 1;
3096 }
3097
Michal Krol904ef742004-10-20 14:54:17 +00003098 di = &(**di).next;
Michal Krola904b492004-03-04 13:07:52 +00003099 }
3100
3101 set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
3102 return 0;
3103}
3104
Michal Krol904ef742004-10-20 14:54:17 +00003105static void append_character (const char x, byte *text, int *dots_made, int *len, int size)
3106{
3107 if (*dots_made == 0)
3108 {
3109 if (*len < size - 1)
3110 {
3111 text[(*len)++] = x;
3112 text[*len] = '\0';
3113 }
3114 else
3115 {
3116 int i;
3117 for (i = 0; i < 3; i++)
3118 if (--(*len) >= 0)
3119 text[*len] = '.';
3120 *dots_made = 1;
3121 }
3122 }
3123}
3124
Michal Krola904b492004-03-04 13:07:52 +00003125void grammar_get_last_error (byte *text, unsigned int size, int *pos)
3126{
Brian Paulbd997cd2004-07-20 21:12:56 +00003127 int len = 0, dots_made = 0;
Michal Krola904b492004-03-04 13:07:52 +00003128 const byte *p = error_message;
3129
3130 *text = '\0';
3131
Michal Krol904ef742004-10-20 14:54:17 +00003132 if (p)
3133 {
3134 while (*p)
3135 {
3136 if (*p == '$')
3137 {
Michal Krola904b492004-03-04 13:07:52 +00003138 const byte *r = error_param;
3139
Michal Krol904ef742004-10-20 14:54:17 +00003140 while (*r)
3141 {
3142 append_character (*r++, text, &dots_made, &len, (int) size);
Michal Krola904b492004-03-04 13:07:52 +00003143 }
Michal Krol904ef742004-10-20 14:54:17 +00003144
Michal Krola904b492004-03-04 13:07:52 +00003145 p++;
3146 }
Michal Krol904ef742004-10-20 14:54:17 +00003147 else
3148 {
3149 append_character (*p++, text, &dots_made, &len, size);
Michal Krola904b492004-03-04 13:07:52 +00003150 }
Michal Krol904ef742004-10-20 14:54:17 +00003151 }
Brian Paul289ffee2004-10-02 15:56:50 +00003152 }
Michal Krol904ef742004-10-20 14:54:17 +00003153
Michal Krola904b492004-03-04 13:07:52 +00003154 *pos = error_position;
Michal Krola904b492004-03-04 13:07:52 +00003155}