blob: a9775961d3a6577654cc072c8f3e423847e578e6 [file] [log] [blame]
Michal Krola904b492004-03-04 13:07:52 +00001/*
2 * Mesa 3-D graphics library
Michal Krola79d4e72006-10-19 08:07:00 +00003 * Version: 6.6
Michal Krola904b492004-03-04 13:07:52 +00004 *
Michal Krola79d4e72006-10-19 08:07:00 +00005 * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
Michal Krola904b492004-03-04 13:07:52 +00006 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25/**
26 * \file grammar.c
27 * syntax parsing engine
28 * \author Michal Krol
29 */
30
31#ifndef GRAMMAR_PORT_BUILD
32#error Do not build this file directly, build your grammar_XXX.c instead, which includes this file
33#endif
34
35/*
Michal Krola904b492004-03-04 13:07:52 +000036*/
37
38/*
39 INTRODUCTION
40 ------------
41
42 The task is to check the syntax of an input string. Input string is a stream of ASCII
43 characters terminated with a null-character ('\0'). Checking it using C language is
44 difficult and hard to implement without bugs. It is hard to maintain and make changes when
45 the syntax changes.
46
47 This is because of a high redundancy of the C code. Large blocks of code are duplicated with
48 only small changes. Even use of macros does not solve the problem because macros cannot
49 erase the complexity of the problem.
50
51 The resolution is to create a new language that will be highly oriented to our task. Once
52 we describe a particular syntax, we are done. We can then focus on the code that implements
53 the language. The size and complexity of it is relatively small than the code that directly
54 checks the syntax.
55
56 First, we must implement our new language. Here, the language is implemented in C, but it
57 could also be implemented in any other language. The code is listed below. We must take
58 a good care that it is bug free. This is simple because the code is simple and clean.
59
60 Next, we must describe the syntax of our new language in itself. Once created and checked
61 manually that it is correct, we can use it to check another scripts.
62
63 Note that our new language loading code does not have to check the syntax. It is because we
64 assume that the script describing itself is correct, and other scripts can be syntactically
65 checked by the former script. The loading code must only do semantic checking which leads us to
66 simple resolving references.
67
68 THE LANGUAGE
69 ------------
70
71 Here I will describe the syntax of the new language (further called "Synek"). It is mainly a
72 sequence of declarations terminated by a semicolon. The declaration consists of a symbol,
73 which is an identifier, and its definition. A definition is in turn a sequence of specifiers
74 connected with ".and" or ".or" operator. These operators cannot be mixed together in a one
75 definition. Specifier can be a symbol, string, character, character range or a special
76 keyword ".true" or ".false".
77
78 On the very beginning of the script there is a declaration of a root symbol and is in the form:
79 .syntax <root_symbol>;
80 The <root_symbol> must be on of the symbols in declaration sequence. The syntax is correct if
81 the root symbol evaluates to true. A symbol evaluates to true if the definition associated with
82 the symbol evaluates to true. Definition evaluation depends on the operator used to connect
83 specifiers in the definition. If ".and" operator is used, definition evaluates to true if and
84 only if all the specifiers evaluate to true. If ".or" operator is used, definition evalutes to
85 true if any of the specifiers evaluates to true. If definition contains only one specifier,
86 it is evaluated as if it was connected with ".true" keyword by ".and" operator.
87
88 If specifier is a ".true" keyword, it always evaluates to true.
89
90 If specifier is a ".false" keyword, it always evaluates to false. Specifier evaluates to false
91 when it does not evaluate to true.
92
93 Character range specifier is in the form:
94 '<first_character>' - '<second_character>'
95 If specifier is a character range, it evaluates to true if character in the stream is greater
96 or equal to <first_character> and less or equal to <second_character>. In that situation
97 the stream pointer is advanced to point to next character in the stream. All C-style escape
98 sequences are supported although trigraph sequences are not. The comparisions are performed
99 on 8-bit unsigned integers.
100
101 Character specifier is in the form:
102 '<single_character>'
103 It evaluates to true if the following character range specifier evaluates to true:
104 '<single_character>' - '<single_character>'
105
106 String specifier is in the form:
107 "<string>"
108 Let N be the number of characters in <string>. Let <string>[i] designate i-th character in
109 <string>. Then the string specifier evaluates to true if and only if for i in the range [0, N)
110 the following character specifier evaluates to true:
111 '<string>[i]'
112 If <string>[i] is a quotation mark, '<string>[i]' is replaced with '\<string>[i]'.
113
114 Symbol specifier can be optionally preceded by a ".loop" keyword in the form:
115 .loop <symbol> (1)
116 where <symbol> is defined as follows:
117 <symbol> <definition>; (2)
118 Construction (1) is replaced by the following code:
119 <symbol$1>
120 and declaration (2) is replaced by the following:
121 <symbol$1> <symbol$2> .or .true;
122 <symbol$2> <symbol> .and <symbol$1>;
123 <symbol> <definition>;
124
125 Synek supports also a register mechanizm. User can, in its SYN file, declare a number of
126 registers that can be accessed in the syn body. Each reg has its name and a default value.
127 The register is one byte wide. The C code can change the default value by calling
128 grammar_set_reg8() with grammar id, register name and a new value. As we know, each rule is
129 a sequence of specifiers joined with .and or .or operator. And now each specifier can be
130 prefixed with a condition expression in a form ".if (<reg_name> <operator> <hex_literal>)"
131 where <operator> can be == or !=. If the condition evaluates to false, the specifier
132 evaluates to .false. Otherwise it evalutes to the specifier.
133
134 ESCAPE SEQUENCES
135 ----------------
136
137 Synek supports all escape sequences in character specifiers. The mapping table is listed below.
138 All occurences of the characters in the first column are replaced with the corresponding
139 character in the second column.
140
141 Escape sequence Represents
142 ------------------------------------------------------------------------------------------------
143 \a Bell (alert)
144 \b Backspace
145 \f Formfeed
146 \n New line
147 \r Carriage return
148 \t Horizontal tab
149 \v Vertical tab
150 \' Single quotation mark
151 \" Double quotation mark
152 \\ Backslash
153 \? Literal question mark
154 \ooo ASCII character in octal notation
155 \xhhh ASCII character in hexadecimal notation
156 ------------------------------------------------------------------------------------------------
157
158 RAISING ERRORS
159 --------------
160
161 Any specifier can be followed by a special construction that is executed when the specifier
162 evaluates to false. The construction is in the form:
163 .error <ERROR_TEXT>
164 <ERROR_TEXT> is an identifier declared earlier by error text declaration. The declaration is
165 in the form:
166 .errtext <ERROR_TEXT> "<error_desc>"
167 When specifier evaluates to false and this construction is present, parsing is stopped
168 immediately and <error_desc> is returned as a result of parsing. The error position is also
169 returned and it is meant as an offset from the beggining of the stream to the character that
170 was valid so far. Example:
171
172 (**** syntax script ****)
173
174 .syntax program;
175 .errtext MISSING_SEMICOLON "missing ';'"
176 program declaration .and .loop space .and ';' .error MISSING_SEMICOLON .and
177 .loop space .and '\0';
178 declaration "declare" .and .loop space .and identifier;
179 space ' ';
180
181 (**** sample code ****)
182
183 declare foo ,
184
185 In the example above checking the sample code will result in error message "missing ';'" and
186 error position 12. The sample code is not correct. Note the presence of '\0' specifier to
187 assure that there is no code after semicolon - only spaces.
188 <error_desc> can optionally contain identifier surrounded by dollar signs $. In such a case,
189 the identifier and dollar signs are replaced by a string retrieved by invoking symbol with
190 the identifier name. The starting position is the error position. The lenght of the resulting
191 string is the position after invoking the symbol.
192
193 PRODUCTION
194 ----------
195
196 Synek not only checks the syntax but it can also produce (emit) bytes associated with specifiers
197 that evaluate to true. That is, every specifier and optional error construction can be followed
198 by a number of emit constructions that are in the form:
199 .emit <parameter>
200 <paramater> can be a HEX number, identifier, a star * or a dollar $. HEX number is preceded by
201 0x or 0X. If <parameter> is an identifier, it must be earlier declared by emit code declaration
202 in the form:
203 .emtcode <identifier> <hex_number>
204
205 When given specifier evaluates to true, all emits associated with the specifier are output
206 in order they were declared. A star means that last-read character should be output instead
207 of constant value. Example:
208
209 (**** syntax script ****)
210
211 .syntax foobar;
212 .emtcode WORD_FOO 0x01
213 .emtcode WORD_BAR 0x02
214 foobar FOO .emit WORD_FOO .or BAR .emit WORD_BAR .or .true .emit 0x00;
215 FOO "foo" .and SPACE;
216 BAR "bar" .and SPACE;
217 SPACE ' ' .or '\0';
218
219 (**** sample text 1 ****)
220
221 foo
222
223 (**** sample text 2 ****)
224
225 foobar
226
227 For both samples the result will be one-element array. For first sample text it will be
228 value 1, for second - 0. Note that every text will be accepted because of presence of
229 .true as an alternative.
230
231 Another example:
232
233 (**** syntax script ****)
234
235 .syntax declaration;
236 .emtcode VARIABLE 0x01
237 declaration "declare" .and .loop space .and
238 identifier .emit VARIABLE .and (1)
239 .true .emit 0x00 .and (2)
240 .loop space .and ';';
241 space ' ' .or '\t';
242 identifier .loop id_char .emit *; (3)
243 id_char 'a'-'z' .or 'A'-'Z' .or '_';
244
245 (**** sample code ****)
246
247 declare fubar;
248
249 In specifier (1) symbol <identifier> is followed by .emit VARIABLE. If it evaluates to
250 true, VARIABLE constant and then production of the symbol is output. Specifier (2) is used
251 to terminate the string with null to signal when the string ends. Specifier (3) outputs
252 all characters that make declared identifier. The result of sample code will be the
253 following array:
254 { 1, 'f', 'u', 'b', 'a', 'r', 0 }
255
256 If .emit is followed by dollar $, it means that current position should be output. Current
257 position is a 32-bit unsigned integer distance from the very beginning of the parsed string to
258 first character consumed by the specifier associated with the .emit instruction. Current
259 position is stored in the output buffer in Little-Endian convention (the lowest byte comes
260 first).
261*/
262
Briandceae282007-01-19 12:02:31 -0700263#include <stdio.h>
264
Michal Krola904b492004-03-04 13:07:52 +0000265static void mem_free (void **);
266
267/*
268 internal error messages
269*/
270static const byte *OUT_OF_MEMORY = (byte *) "internal error 1001: out of physical memory";
271static const byte *UNRESOLVED_REFERENCE = (byte *) "internal error 1002: unresolved reference '$'";
272static const byte *INVALID_GRAMMAR_ID = (byte *) "internal error 1003: invalid grammar object";
273static const byte *INVALID_REGISTER_NAME = (byte *) "internal error 1004: invalid register name: '$'";
Brian Paul8e8fa632005-07-01 02:03:33 +0000274/*static const byte *DUPLICATE_IDENTIFIER = (byte *) "internal error 1005: identifier '$' already defined";*/
Michal Krol904ef742004-10-20 14:54:17 +0000275static const byte *UNREFERENCED_IDENTIFIER =(byte *) "internal error 1006: unreferenced identifier '$'";
Michal Krola904b492004-03-04 13:07:52 +0000276
Michal Krol904ef742004-10-20 14:54:17 +0000277static const byte *error_message = NULL; /* points to one of the error messages above */
Michal Krola904b492004-03-04 13:07:52 +0000278static byte *error_param = NULL; /* this is inserted into error_message in place of $ */
279static int error_position = -1;
280
281static byte *unknown = (byte *) "???";
282
Alan Hourihanec63f3cf2004-12-08 14:00:46 +0000283static void clear_last_error (void)
Michal Krola904b492004-03-04 13:07:52 +0000284{
285 /* reset error message */
286 error_message = NULL;
287
288 /* free error parameter - if error_param is a "???" don't free it - it's static */
289 if (error_param != unknown)
Brian Paula6c423d2004-08-25 15:59:48 +0000290 mem_free ((void **) (void *) &error_param);
Michal Krola904b492004-03-04 13:07:52 +0000291 else
292 error_param = NULL;
293
294 /* reset error position */
295 error_position = -1;
296}
297
298static void set_last_error (const byte *msg, byte *param, int pos)
299{
Michal Krol904ef742004-10-20 14:54:17 +0000300 /* error message can be set only once */
Michal Krola904b492004-03-04 13:07:52 +0000301 if (error_message != NULL)
302 {
Brian Paula6c423d2004-08-25 15:59:48 +0000303 mem_free ((void **) (void *) &param);
Michal Krola904b492004-03-04 13:07:52 +0000304 return;
305 }
306
307 error_message = msg;
308
Michal Krol904ef742004-10-20 14:54:17 +0000309 /* if param is NULL, set error_param to unknown ("???") */
310 /* note: do not try to strdup the "???" - it may be that we are here because of */
311 /* out of memory error so strdup can fail */
Michal Krola904b492004-03-04 13:07:52 +0000312 if (param != NULL)
313 error_param = param;
314 else
315 error_param = unknown;
316
317 error_position = pos;
318}
319
320/*
321 memory management routines
322*/
323static void *mem_alloc (size_t size)
324{
325 void *ptr = grammar_alloc_malloc (size);
326 if (ptr == NULL)
327 set_last_error (OUT_OF_MEMORY, NULL, -1);
328 return ptr;
329}
330
331static void *mem_copy (void *dst, const void *src, size_t size)
332{
333 return grammar_memory_copy (dst, src, size);
334}
335
336static void mem_free (void **ptr)
337{
338 grammar_alloc_free (*ptr);
339 *ptr = NULL;
340}
341
342static void *mem_realloc (void *ptr, size_t old_size, size_t new_size)
343{
344 void *ptr2 = grammar_alloc_realloc (ptr, old_size, new_size);
345 if (ptr2 == NULL)
346 set_last_error (OUT_OF_MEMORY, NULL, -1);
347 return ptr2;
348}
349
350static byte *str_copy_n (byte *dst, const byte *src, size_t max_len)
351{
352 return grammar_string_copy_n (dst, src, max_len);
353}
354
355static byte *str_duplicate (const byte *str)
356{
357 byte *new_str = grammar_string_duplicate (str);
358 if (new_str == NULL)
359 set_last_error (OUT_OF_MEMORY, NULL, -1);
360 return new_str;
361}
362
363static int str_equal (const byte *str1, const byte *str2)
364{
365 return grammar_string_compare (str1, str2) == 0;
366}
367
368static int str_equal_n (const byte *str1, const byte *str2, unsigned int n)
369{
370 return grammar_string_compare_n (str1, str2, n) == 0;
371}
372
Michal Krola79d4e72006-10-19 08:07:00 +0000373static int
374str_length (const byte *str)
Michal Krola904b492004-03-04 13:07:52 +0000375{
Michal Krola79d4e72006-10-19 08:07:00 +0000376 return (int) (grammar_string_length (str));
Michal Krola904b492004-03-04 13:07:52 +0000377}
378
379/*
Michal Krol904ef742004-10-20 14:54:17 +0000380 useful macros
381*/
382#define GRAMMAR_IMPLEMENT_LIST_APPEND(_Ty)\
383 static void _Ty##_append (_Ty **x, _Ty *nx) {\
384 while (*x) x = &(**x).next;\
385 *x = nx;\
386 }
387
388/*
Michal Krola904b492004-03-04 13:07:52 +0000389 string to byte map typedef
390*/
391typedef struct map_byte_
392{
393 byte *key;
394 byte data;
395 struct map_byte_ *next;
396} map_byte;
397
398static void map_byte_create (map_byte **ma)
399{
Brian Paulbdd15b52004-05-04 15:11:06 +0000400 *ma = (map_byte *) mem_alloc (sizeof (map_byte));
Michal Krola904b492004-03-04 13:07:52 +0000401 if (*ma)
402 {
403 (**ma).key = NULL;
404 (**ma).data = '\0';
405 (**ma).next = NULL;
406 }
407}
408
Michal Krola904b492004-03-04 13:07:52 +0000409static void map_byte_destroy (map_byte **ma)
410{
411 if (*ma)
412 {
413 map_byte_destroy (&(**ma).next);
414 mem_free ((void **) &(**ma).key);
415 mem_free ((void **) ma);
416 }
417}
418
Michal Krol904ef742004-10-20 14:54:17 +0000419GRAMMAR_IMPLEMENT_LIST_APPEND(map_byte)
Michal Krola904b492004-03-04 13:07:52 +0000420
421/*
422 searches the map for the specified key,
423 returns pointer to the element with the specified key if it exists
424 returns NULL otherwise
425*/
Brian Paul788461f2004-03-29 14:53:49 +0000426static map_byte *map_byte_locate (map_byte **ma, const byte *key)
Michal Krola904b492004-03-04 13:07:52 +0000427{
428 while (*ma)
429 {
430 if (str_equal ((**ma).key, key))
431 return *ma;
432
433 ma = &(**ma).next;
434 }
435
436 set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
437 return NULL;
438}
439
440/*
441 searches the map for specified key,
442 if the key is matched, *data is filled with data associated with the key,
443 returns 0 if the key is matched,
444 returns 1 otherwise
445*/
446static int map_byte_find (map_byte **ma, const byte *key, byte *data)
447{
448 map_byte *found = map_byte_locate (ma, key);
449 if (found != NULL)
450 {
451 *data = found->data;
452
453 return 0;
454 }
455
456 return 1;
457}
458
459/*
460 regbyte context typedef
461
462 Each regbyte consists of its name and a default value. These are static and created at
463 grammar script compile-time, for example the following line:
464 .regbyte vertex_blend 0x00
465 adds a new regbyte named "vertex_blend" to the static list and initializes it to 0.
466 When the script is executed, this regbyte can be accessed by name for read and write. When a
467 particular regbyte is written, a new regbyte_ctx entry is added to the top of the regbyte_ctx
468 stack. The new entry contains information abot which regbyte it references and its new value.
469 When a given regbyte is accessed for read, the stack is searched top-down to find an
470 entry that references the regbyte. The first matching entry is used to return the current
471 value it holds. If no entry is found, the default value is returned.
472*/
473typedef struct regbyte_ctx_
474{
475 map_byte *m_regbyte;
476 byte m_current_value;
477 struct regbyte_ctx_ *m_prev;
478} regbyte_ctx;
479
480static void regbyte_ctx_create (regbyte_ctx **re)
481{
Brian Paulbdd15b52004-05-04 15:11:06 +0000482 *re = (regbyte_ctx *) mem_alloc (sizeof (regbyte_ctx));
Michal Krola904b492004-03-04 13:07:52 +0000483 if (*re)
484 {
485 (**re).m_regbyte = NULL;
486 (**re).m_prev = NULL;
487 }
488}
489
490static void regbyte_ctx_destroy (regbyte_ctx **re)
491{
492 if (*re)
493 {
494 mem_free ((void **) re);
495 }
496}
497
498static byte regbyte_ctx_extract (regbyte_ctx **re, map_byte *reg)
499{
500 /* first lookup in the register stack */
501 while (*re != NULL)
502 {
503 if ((**re).m_regbyte == reg)
504 return (**re).m_current_value;
505
506 re = &(**re).m_prev;
507 }
508
509 /* if not found - return the default value */
510 return reg->data;
511}
512
513/*
514 emit type typedef
515*/
516typedef enum emit_type_
517{
518 et_byte, /* explicit number */
519 et_stream, /* eaten character */
520 et_position /* current position */
521} emit_type;
522
523/*
524 emit destination typedef
525*/
526typedef enum emit_dest_
527{
528 ed_output, /* write to the output buffer */
529 ed_regbyte /* write a particular regbyte */
530} emit_dest;
531
532/*
533 emit typedef
534*/
535typedef struct emit_
536{
537 emit_dest m_emit_dest;
538 emit_type m_emit_type; /* ed_output */
539 byte m_byte; /* et_byte */
540 map_byte *m_regbyte; /* ed_regbyte */
541 byte *m_regname; /* ed_regbyte - temporary */
542 struct emit_ *m_next;
543} emit;
544
545static void emit_create (emit **em)
546{
Brian Paulbdd15b52004-05-04 15:11:06 +0000547 *em = (emit *) mem_alloc (sizeof (emit));
Michal Krola904b492004-03-04 13:07:52 +0000548 if (*em)
549 {
550 (**em).m_emit_dest = ed_output;
551 (**em).m_emit_type = et_byte;
552 (**em).m_byte = '\0';
553 (**em).m_regbyte = NULL;
554 (**em).m_regname = NULL;
555 (**em).m_next = NULL;
556 }
557}
558
559static void emit_destroy (emit **em)
560{
561 if (*em)
562 {
563 emit_destroy (&(**em).m_next);
564 mem_free ((void **) &(**em).m_regname);
565 mem_free ((void **) em);
566 }
567}
568
Michal Krol904ef742004-10-20 14:54:17 +0000569static unsigned int emit_size (emit *_E)
570{
Ian Romanick9f23a3a2005-07-28 00:11:10 +0000571 unsigned int n = 0;
Michal Krol904ef742004-10-20 14:54:17 +0000572
573 while (_E != NULL)
574 {
575 if (_E->m_emit_dest == ed_output)
576 {
577 if (_E->m_emit_type == et_position)
Ian Romanick9f23a3a2005-07-28 00:11:10 +0000578 n += 4; /* position is a 32-bit unsigned integer */
Michal Krol904ef742004-10-20 14:54:17 +0000579 else
Ian Romanick9f23a3a2005-07-28 00:11:10 +0000580 n++;
Michal Krol904ef742004-10-20 14:54:17 +0000581 }
582 _E = _E->m_next;
583 }
584
Ian Romanick9f23a3a2005-07-28 00:11:10 +0000585 return n;
Michal Krol904ef742004-10-20 14:54:17 +0000586}
587
Ian Romanick9f23a3a2005-07-28 00:11:10 +0000588static int emit_push (emit *_E, byte *_P, byte c, unsigned int _Pos, regbyte_ctx **_Ctx)
Michal Krol904ef742004-10-20 14:54:17 +0000589{
590 while (_E != NULL)
591 {
592 if (_E->m_emit_dest == ed_output)
593 {
594 if (_E->m_emit_type == et_byte)
595 *_P++ = _E->m_byte;
596 else if (_E->m_emit_type == et_stream)
Ian Romanick9f23a3a2005-07-28 00:11:10 +0000597 *_P++ = c;
Michal Krol904ef742004-10-20 14:54:17 +0000598 else /* _Em->type == et_position */
599 {
600 *_P++ = (byte) (_Pos);
601 *_P++ = (byte) (_Pos >> 8);
602 *_P++ = (byte) (_Pos >> 16);
603 *_P++ = (byte) (_Pos >> 24);
604 }
605 }
606 else
607 {
608 regbyte_ctx *new_rbc;
609 regbyte_ctx_create (&new_rbc);
610 if (new_rbc == NULL)
611 return 1;
612
613 new_rbc->m_prev = *_Ctx;
614 new_rbc->m_regbyte = _E->m_regbyte;
615 *_Ctx = new_rbc;
616
617 if (_E->m_emit_type == et_byte)
618 new_rbc->m_current_value = _E->m_byte;
619 else if (_E->m_emit_type == et_stream)
Ian Romanick9f23a3a2005-07-28 00:11:10 +0000620 new_rbc->m_current_value = c;
Michal Krol904ef742004-10-20 14:54:17 +0000621 }
622
623 _E = _E->m_next;
624 }
625
626 return 0;
627}
628
Michal Krola904b492004-03-04 13:07:52 +0000629/*
630 error typedef
631*/
632typedef struct error_
633{
634 byte *m_text;
635 byte *m_token_name;
636 struct rule_ *m_token;
637} error;
638
639static void error_create (error **er)
640{
Brian Paulbdd15b52004-05-04 15:11:06 +0000641 *er = (error *) mem_alloc (sizeof (error));
Michal Krola904b492004-03-04 13:07:52 +0000642 if (*er)
643 {
644 (**er).m_text = NULL;
645 (**er).m_token_name = NULL;
646 (**er).m_token = NULL;
647 }
648}
649
650static void error_destroy (error **er)
651{
652 if (*er)
653 {
654 mem_free ((void **) &(**er).m_text);
655 mem_free ((void **) &(**er).m_token_name);
656 mem_free ((void **) er);
657 }
658}
659
660struct dict_;
Michal Krola79d4e72006-10-19 08:07:00 +0000661
662static byte *
663error_get_token (error *, struct dict_ *, const byte *, int);
Michal Krola904b492004-03-04 13:07:52 +0000664
665/*
666 condition operand type typedef
667*/
668typedef enum cond_oper_type_
669{
670 cot_byte, /* constant 8-bit unsigned integer */
671 cot_regbyte /* pointer to byte register containing the current value */
672} cond_oper_type;
673
674/*
675 condition operand typedef
676*/
677typedef struct cond_oper_
678{
679 cond_oper_type m_type;
680 byte m_byte; /* cot_byte */
681 map_byte *m_regbyte; /* cot_regbyte */
682 byte *m_regname; /* cot_regbyte - temporary */
683} cond_oper;
684
685/*
686 condition type typedef
687*/
688typedef enum cond_type_
689{
690 ct_equal,
691 ct_not_equal
692} cond_type;
693
694/*
695 condition typedef
696*/
697typedef struct cond_
698{
699 cond_type m_type;
700 cond_oper m_operands[2];
701} cond;
702
703static void cond_create (cond **co)
704{
Brian Paulbdd15b52004-05-04 15:11:06 +0000705 *co = (cond *) mem_alloc (sizeof (cond));
Michal Krola904b492004-03-04 13:07:52 +0000706 if (*co)
707 {
708 (**co).m_operands[0].m_regname = NULL;
709 (**co).m_operands[1].m_regname = NULL;
710 }
711}
712
713static void cond_destroy (cond **co)
714{
715 if (*co)
716 {
717 mem_free ((void **) &(**co).m_operands[0].m_regname);
718 mem_free ((void **) &(**co).m_operands[1].m_regname);
719 mem_free ((void **) co);
720 }
721}
722
723/*
724 specifier type typedef
725*/
726typedef enum spec_type_
727{
728 st_false,
729 st_true,
730 st_byte,
731 st_byte_range,
732 st_string,
733 st_identifier,
734 st_identifier_loop,
735 st_debug
736} spec_type;
737
738/*
739 specifier typedef
740*/
741typedef struct spec_
742{
743 spec_type m_spec_type;
744 byte m_byte[2]; /* st_byte, st_byte_range */
745 byte *m_string; /* st_string */
746 struct rule_ *m_rule; /* st_identifier, st_identifier_loop */
747 emit *m_emits;
748 error *m_errtext;
749 cond *m_cond;
Michal Krol904ef742004-10-20 14:54:17 +0000750 struct spec_ *next;
Michal Krola904b492004-03-04 13:07:52 +0000751} spec;
752
753static void spec_create (spec **sp)
754{
Brian Paulbdd15b52004-05-04 15:11:06 +0000755 *sp = (spec *) mem_alloc (sizeof (spec));
Michal Krola904b492004-03-04 13:07:52 +0000756 if (*sp)
757 {
758 (**sp).m_spec_type = st_false;
759 (**sp).m_byte[0] = '\0';
760 (**sp).m_byte[1] = '\0';
761 (**sp).m_string = NULL;
762 (**sp).m_rule = NULL;
763 (**sp).m_emits = NULL;
764 (**sp).m_errtext = NULL;
765 (**sp).m_cond = NULL;
Michal Krol904ef742004-10-20 14:54:17 +0000766 (**sp).next = NULL;
Michal Krola904b492004-03-04 13:07:52 +0000767 }
768}
769
770static void spec_destroy (spec **sp)
771{
772 if (*sp)
773 {
Michal Krol904ef742004-10-20 14:54:17 +0000774 spec_destroy (&(**sp).next);
Michal Krola904b492004-03-04 13:07:52 +0000775 emit_destroy (&(**sp).m_emits);
776 error_destroy (&(**sp).m_errtext);
777 mem_free ((void **) &(**sp).m_string);
778 cond_destroy (&(**sp).m_cond);
779 mem_free ((void **) sp);
780 }
781}
782
Michal Krol904ef742004-10-20 14:54:17 +0000783GRAMMAR_IMPLEMENT_LIST_APPEND(spec)
Michal Krola904b492004-03-04 13:07:52 +0000784
785/*
786 operator typedef
787*/
788typedef enum oper_
789{
790 op_none,
791 op_and,
792 op_or
793} oper;
794
795/*
796 rule typedef
797*/
798typedef struct rule_
799{
800 oper m_oper;
801 spec *m_specs;
Michal Krol904ef742004-10-20 14:54:17 +0000802 struct rule_ *next;
803 int m_referenced;
Michal Krola904b492004-03-04 13:07:52 +0000804} rule;
805
806static void rule_create (rule **ru)
807{
Brian Paulbdd15b52004-05-04 15:11:06 +0000808 *ru = (rule *) mem_alloc (sizeof (rule));
Michal Krola904b492004-03-04 13:07:52 +0000809 if (*ru)
810 {
811 (**ru).m_oper = op_none;
812 (**ru).m_specs = NULL;
Michal Krol904ef742004-10-20 14:54:17 +0000813 (**ru).next = NULL;
814 (**ru).m_referenced = 0;
Michal Krola904b492004-03-04 13:07:52 +0000815 }
816}
817
818static void rule_destroy (rule **ru)
819{
820 if (*ru)
821 {
Michal Krol904ef742004-10-20 14:54:17 +0000822 rule_destroy (&(**ru).next);
Michal Krola904b492004-03-04 13:07:52 +0000823 spec_destroy (&(**ru).m_specs);
824 mem_free ((void **) ru);
825 }
826}
827
Michal Krol904ef742004-10-20 14:54:17 +0000828GRAMMAR_IMPLEMENT_LIST_APPEND(rule)
Michal Krola904b492004-03-04 13:07:52 +0000829
830/*
831 returns unique grammar id
832*/
Alan Hourihanec63f3cf2004-12-08 14:00:46 +0000833static grammar next_valid_grammar_id (void)
Michal Krola904b492004-03-04 13:07:52 +0000834{
835 static grammar id = 0;
836
837 return ++id;
838}
839
840/*
841 dictionary typedef
842*/
843typedef struct dict_
844{
845 rule *m_rulez;
846 rule *m_syntax;
847 rule *m_string;
848 map_byte *m_regbytes;
849 grammar m_id;
Michal Krol904ef742004-10-20 14:54:17 +0000850 struct dict_ *next;
Michal Krola904b492004-03-04 13:07:52 +0000851} dict;
852
853static void dict_create (dict **di)
854{
Brian Paulbdd15b52004-05-04 15:11:06 +0000855 *di = (dict *) mem_alloc (sizeof (dict));
Michal Krola904b492004-03-04 13:07:52 +0000856 if (*di)
857 {
858 (**di).m_rulez = NULL;
859 (**di).m_syntax = NULL;
860 (**di).m_string = NULL;
861 (**di).m_regbytes = NULL;
862 (**di).m_id = next_valid_grammar_id ();
Michal Krol904ef742004-10-20 14:54:17 +0000863 (**di).next = NULL;
Michal Krola904b492004-03-04 13:07:52 +0000864 }
865}
866
867static void dict_destroy (dict **di)
868{
869 if (*di)
870 {
871 rule_destroy (&(**di).m_rulez);
872 map_byte_destroy (&(**di).m_regbytes);
873 mem_free ((void **) di);
874 }
875}
876
Michal Krol904ef742004-10-20 14:54:17 +0000877GRAMMAR_IMPLEMENT_LIST_APPEND(dict)
Michal Krola904b492004-03-04 13:07:52 +0000878
879static void dict_find (dict **di, grammar key, dict **data)
880{
881 while (*di)
882 {
883 if ((**di).m_id == key)
884 {
885 *data = *di;
886 return;
887 }
888
Michal Krol904ef742004-10-20 14:54:17 +0000889 di = &(**di).next;
Michal Krola904b492004-03-04 13:07:52 +0000890 }
891
892 *data = NULL;
893}
894
895static dict *g_dicts = NULL;
896
897/*
898 byte array typedef
Michal Krola904b492004-03-04 13:07:52 +0000899*/
900typedef struct barray_
901{
902 byte *data;
903 unsigned int len;
904} barray;
905
906static void barray_create (barray **ba)
907{
Brian Paulbdd15b52004-05-04 15:11:06 +0000908 *ba = (barray *) mem_alloc (sizeof (barray));
Michal Krola904b492004-03-04 13:07:52 +0000909 if (*ba)
910 {
911 (**ba).data = NULL;
912 (**ba).len = 0;
913 }
914}
915
916static void barray_destroy (barray **ba)
917{
918 if (*ba)
919 {
920 mem_free ((void **) &(**ba).data);
921 mem_free ((void **) ba);
922 }
923}
924
925/*
926 reallocates byte array to requested size,
927 returns 0 on success,
928 returns 1 otherwise
929*/
930static int barray_resize (barray **ba, unsigned int nlen)
931{
932 byte *new_pointer;
933
934 if (nlen == 0)
935 {
936 mem_free ((void **) &(**ba).data);
937 (**ba).data = NULL;
938 (**ba).len = 0;
939
940 return 0;
941 }
942 else
943 {
Michal Krol904ef742004-10-20 14:54:17 +0000944 new_pointer = (byte *) mem_realloc ((**ba).data, (**ba).len * sizeof (byte),
945 nlen * sizeof (byte));
Michal Krola904b492004-03-04 13:07:52 +0000946 if (new_pointer)
947 {
948 (**ba).data = new_pointer;
949 (**ba).len = nlen;
950
951 return 0;
952 }
953 }
954
955 return 1;
956}
957
958/*
959 adds byte array pointed by *nb to the end of array pointed by *ba,
960 returns 0 on success,
961 returns 1 otherwise
962*/
963static int barray_append (barray **ba, barray **nb)
964{
965 const unsigned int len = (**ba).len;
966
967 if (barray_resize (ba, (**ba).len + (**nb).len))
968 return 1;
969
970 mem_copy ((**ba).data + len, (**nb).data, (**nb).len);
971
972 return 0;
973}
974
975/*
976 adds emit chain pointed by em to the end of array pointed by *ba,
977 returns 0 on success,
978 returns 1 otherwise
979*/
980static int barray_push (barray **ba, emit *em, byte c, unsigned int pos, regbyte_ctx **rbc)
981{
Michal Krol904ef742004-10-20 14:54:17 +0000982 unsigned int count = emit_size (em);
Michal Krola904b492004-03-04 13:07:52 +0000983
984 if (barray_resize (ba, (**ba).len + count))
985 return 1;
986
Michal Krol904ef742004-10-20 14:54:17 +0000987 return emit_push (em, (**ba).data + ((**ba).len - count), c, pos, rbc);
988}
989
990/*
991 byte pool typedef
992*/
993typedef struct bytepool_
994{
995 byte *_F;
996 unsigned int _Siz;
997} bytepool;
998
999static void bytepool_destroy (bytepool **by)
1000{
1001 if (*by != NULL)
Michal Krola904b492004-03-04 13:07:52 +00001002 {
Michal Krol904ef742004-10-20 14:54:17 +00001003 mem_free ((void **) &(**by)._F);
1004 mem_free ((void **) by);
1005 }
1006}
Michal Krola904b492004-03-04 13:07:52 +00001007
Michal Krol904ef742004-10-20 14:54:17 +00001008static void bytepool_create (bytepool **by, int len)
1009{
1010 *by = (bytepool *) (mem_alloc (sizeof (bytepool)));
1011 if (*by != NULL)
1012 {
1013 (**by)._F = (byte *) (mem_alloc (sizeof (byte) * len));
1014 (**by)._Siz = len;
Michal Krola904b492004-03-04 13:07:52 +00001015
Michal Krol904ef742004-10-20 14:54:17 +00001016 if ((**by)._F == NULL)
1017 bytepool_destroy (by);
1018 }
1019}
Michal Krola904b492004-03-04 13:07:52 +00001020
Ian Romanick9f23a3a2005-07-28 00:11:10 +00001021static int bytepool_reserve (bytepool *by, unsigned int n)
Michal Krol904ef742004-10-20 14:54:17 +00001022{
1023 byte *_P;
1024
Ian Romanick9f23a3a2005-07-28 00:11:10 +00001025 if (n <= by->_Siz)
Michal Krol904ef742004-10-20 14:54:17 +00001026 return 0;
1027
1028 /* byte pool can only grow and at least by doubling its size */
Ian Romanick9f23a3a2005-07-28 00:11:10 +00001029 n = n >= by->_Siz * 2 ? n : by->_Siz * 2;
Michal Krol904ef742004-10-20 14:54:17 +00001030
1031 /* reallocate the memory and adjust pointers to the new memory location */
Ian Romanick9f23a3a2005-07-28 00:11:10 +00001032 _P = (byte *) (mem_realloc (by->_F, sizeof (byte) * by->_Siz, sizeof (byte) * n));
Michal Krol904ef742004-10-20 14:54:17 +00001033 if (_P != NULL)
1034 {
1035 by->_F = _P;
Ian Romanick9f23a3a2005-07-28 00:11:10 +00001036 by->_Siz = n;
Michal Krol904ef742004-10-20 14:54:17 +00001037 return 0;
Michal Krola904b492004-03-04 13:07:52 +00001038 }
1039
Michal Krol904ef742004-10-20 14:54:17 +00001040 return 1;
Michal Krola904b492004-03-04 13:07:52 +00001041}
1042
1043/*
1044 string to string map typedef
1045*/
1046typedef struct map_str_
1047{
1048 byte *key;
1049 byte *data;
1050 struct map_str_ *next;
1051} map_str;
1052
1053static void map_str_create (map_str **ma)
1054{
Brian Paulbdd15b52004-05-04 15:11:06 +00001055 *ma = (map_str *) mem_alloc (sizeof (map_str));
Michal Krola904b492004-03-04 13:07:52 +00001056 if (*ma)
1057 {
1058 (**ma).key = NULL;
1059 (**ma).data = NULL;
1060 (**ma).next = NULL;
1061 }
1062}
1063
1064static void map_str_destroy (map_str **ma)
1065{
1066 if (*ma)
1067 {
1068 map_str_destroy (&(**ma).next);
1069 mem_free ((void **) &(**ma).key);
1070 mem_free ((void **) &(**ma).data);
1071 mem_free ((void **) ma);
1072 }
1073}
1074
Michal Krol904ef742004-10-20 14:54:17 +00001075GRAMMAR_IMPLEMENT_LIST_APPEND(map_str)
Michal Krola904b492004-03-04 13:07:52 +00001076
1077/*
1078 searches the map for specified key,
1079 if the key is matched, *data is filled with data associated with the key,
1080 returns 0 if the key is matched,
1081 returns 1 otherwise
1082*/
1083static int map_str_find (map_str **ma, const byte *key, byte **data)
1084{
1085 while (*ma)
1086 {
1087 if (str_equal ((**ma).key, key))
1088 {
1089 *data = str_duplicate ((**ma).data);
1090 if (*data == NULL)
1091 return 1;
1092
1093 return 0;
1094 }
1095
1096 ma = &(**ma).next;
1097 }
1098
1099 set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
1100 return 1;
1101}
1102
1103/*
1104 string to rule map typedef
1105*/
1106typedef struct map_rule_
1107{
1108 byte *key;
1109 rule *data;
1110 struct map_rule_ *next;
1111} map_rule;
1112
1113static void map_rule_create (map_rule **ma)
1114{
Brian Paulbdd15b52004-05-04 15:11:06 +00001115 *ma = (map_rule *) mem_alloc (sizeof (map_rule));
Michal Krola904b492004-03-04 13:07:52 +00001116 if (*ma)
1117 {
1118 (**ma).key = NULL;
1119 (**ma).data = NULL;
1120 (**ma).next = NULL;
1121 }
1122}
1123
1124static void map_rule_destroy (map_rule **ma)
1125{
1126 if (*ma)
1127 {
1128 map_rule_destroy (&(**ma).next);
1129 mem_free ((void **) &(**ma).key);
1130 mem_free ((void **) ma);
1131 }
1132}
1133
Michal Krol904ef742004-10-20 14:54:17 +00001134GRAMMAR_IMPLEMENT_LIST_APPEND(map_rule)
Michal Krola904b492004-03-04 13:07:52 +00001135
1136/*
1137 searches the map for specified key,
1138 if the key is matched, *data is filled with data associated with the key,
1139 returns 0 if the is matched,
1140 returns 1 otherwise
1141*/
1142static int map_rule_find (map_rule **ma, const byte *key, rule **data)
1143{
1144 while (*ma)
1145 {
1146 if (str_equal ((**ma).key, key))
1147 {
1148 *data = (**ma).data;
1149
1150 return 0;
1151 }
1152
1153 ma = &(**ma).next;
1154 }
1155
1156 set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);
1157 return 1;
1158}
1159
1160/*
1161 returns 1 if given character is a white space,
1162 returns 0 otherwise
1163*/
1164static int is_space (byte c)
1165{
1166 return c == ' ' || c == '\t' || c == '\n' || c == '\r';
1167}
1168
1169/*
1170 advances text pointer by 1 if character pointed by *text is a space,
1171 returns 1 if a space has been eaten,
1172 returns 0 otherwise
1173*/
1174static int eat_space (const byte **text)
1175{
1176 if (is_space (**text))
1177 {
1178 (*text)++;
1179
1180 return 1;
1181 }
1182
1183 return 0;
1184}
1185
1186/*
Brian Paul788461f2004-03-29 14:53:49 +00001187 returns 1 if text points to C-style comment start string,
Michal Krola904b492004-03-04 13:07:52 +00001188 returns 0 otherwise
1189*/
1190static int is_comment_start (const byte *text)
1191{
1192 return text[0] == '/' && text[1] == '*';
1193}
1194
1195/*
1196 advances text pointer to first character after C-style comment block - if any,
1197 returns 1 if C-style comment block has been encountered and eaten,
1198 returns 0 otherwise
1199*/
1200static int eat_comment (const byte **text)
1201{
1202 if (is_comment_start (*text))
1203 {
1204 /* *text points to comment block - skip two characters to enter comment body */
1205 *text += 2;
1206 /* skip any character except consecutive '*' and '/' */
1207 while (!((*text)[0] == '*' && (*text)[1] == '/'))
1208 (*text)++;
1209 /* skip those two terminating characters */
1210 *text += 2;
1211
1212 return 1;
1213 }
1214
1215 return 0;
1216}
1217
1218/*
1219 advances text pointer to first character that is neither space nor C-style comment block
1220*/
1221static void eat_spaces (const byte **text)
1222{
1223 while (eat_space (text) || eat_comment (text))
1224 ;
1225}
1226
1227/*
1228 resizes string pointed by *ptr to successfully add character c to the end of the string,
1229 returns 0 on success,
1230 returns 1 otherwise
1231*/
1232static int string_grow (byte **ptr, unsigned int *len, byte c)
1233{
1234 /* reallocate the string in 16-byte increments */
1235 if ((*len & 0x0F) == 0x0F || *ptr == NULL)
1236 {
Brian Paulbdd15b52004-05-04 15:11:06 +00001237 byte *tmp = (byte *) mem_realloc (*ptr, ((*len + 1) & ~0x0F) * sizeof (byte),
Michal Krola904b492004-03-04 13:07:52 +00001238 ((*len + 1 + 0x10) & ~0x0F) * sizeof (byte));
1239 if (tmp == NULL)
1240 return 1;
1241
1242 *ptr = tmp;
1243 }
1244
1245 if (c)
1246 {
1247 /* append given character */
1248 (*ptr)[*len] = c;
1249 (*len)++;
1250 }
1251 (*ptr)[*len] = '\0';
1252
1253 return 0;
1254}
1255
1256/*
1257 returns 1 if given character is a valid identifier character a-z, A-Z, 0-9 or _
1258 returns 0 otherwise
1259*/
1260static int is_identifier (byte c)
1261{
1262 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
1263}
1264
1265/*
1266 copies characters from *text to *id until non-identifier character is encountered,
1267 assumes that *id points to NULL object - caller is responsible for later freeing the string,
1268 text pointer is advanced to point past the copied identifier,
1269 returns 0 if identifier was successfully copied,
1270 returns 1 otherwise
1271*/
1272static int get_identifier (const byte **text, byte **id)
1273{
1274 const byte *t = *text;
1275 byte *p = NULL;
1276 unsigned int len = 0;
1277
1278 if (string_grow (&p, &len, '\0'))
1279 return 1;
1280
1281 /* loop while next character in buffer is valid for identifiers */
1282 while (is_identifier (*t))
1283 {
1284 if (string_grow (&p, &len, *t++))
1285 {
Brian Paula6c423d2004-08-25 15:59:48 +00001286 mem_free ((void **) (void *) &p);
Michal Krola904b492004-03-04 13:07:52 +00001287 return 1;
1288 }
1289 }
1290
1291 *text = t;
1292 *id = p;
1293
1294 return 0;
1295}
1296
1297/*
Michal Krol904ef742004-10-20 14:54:17 +00001298 converts sequence of DEC digits pointed by *text until non-DEC digit is encountered,
1299 advances text pointer past the converted sequence,
1300 returns the converted value
1301*/
1302static unsigned int dec_convert (const byte **text)
1303{
1304 unsigned int value = 0;
1305
1306 while (**text >= '0' && **text <= '9')
1307 {
1308 value = value * 10 + **text - '0';
1309 (*text)++;
1310 }
1311
1312 return value;
1313}
1314
1315/*
Michal Krola904b492004-03-04 13:07:52 +00001316 returns 1 if given character is HEX digit 0-9, A-F or a-f,
1317 returns 0 otherwise
1318*/
1319static int is_hex (byte c)
1320{
1321 return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
1322}
1323
1324/*
1325 returns value of passed character as if it was HEX digit
1326*/
1327static unsigned int hex2dec (byte c)
1328{
1329 if (c >= '0' && c <= '9')
1330 return c - '0';
1331 if (c >= 'A' && c <= 'F')
1332 return c - 'A' + 10;
1333 return c - 'a' + 10;
1334}
1335
1336/*
1337 converts sequence of HEX digits pointed by *text until non-HEX digit is encountered,
1338 advances text pointer past the converted sequence,
1339 returns the converted value
1340*/
1341static unsigned int hex_convert (const byte **text)
1342{
1343 unsigned int value = 0;
1344
1345 while (is_hex (**text))
1346 {
1347 value = value * 0x10 + hex2dec (**text);
1348 (*text)++;
1349 }
1350
1351 return value;
1352}
1353
1354/*
1355 returns 1 if given character is OCT digit 0-7,
1356 returns 0 otherwise
1357*/
1358static int is_oct (byte c)
1359{
1360 return c >= '0' && c <= '7';
1361}
1362
1363/*
1364 returns value of passed character as if it was OCT digit
1365*/
1366static int oct2dec (byte c)
1367{
1368 return c - '0';
1369}
1370
1371static byte get_escape_sequence (const byte **text)
1372{
1373 int value = 0;
1374
1375 /* skip '\' character */
1376 (*text)++;
1377
1378 switch (*(*text)++)
1379 {
1380 case '\'':
1381 return '\'';
1382 case '"':
1383 return '\"';
1384 case '?':
1385 return '\?';
1386 case '\\':
1387 return '\\';
1388 case 'a':
1389 return '\a';
1390 case 'b':
1391 return '\b';
1392 case 'f':
1393 return '\f';
1394 case 'n':
1395 return '\n';
1396 case 'r':
1397 return '\r';
1398 case 't':
1399 return '\t';
1400 case 'v':
1401 return '\v';
1402 case 'x':
1403 return (byte) hex_convert (text);
1404 }
1405
1406 (*text)--;
1407 if (is_oct (**text))
1408 {
1409 value = oct2dec (*(*text)++);
1410 if (is_oct (**text))
1411 {
1412 value = value * 010 + oct2dec (*(*text)++);
1413 if (is_oct (**text))
1414 value = value * 010 + oct2dec (*(*text)++);
1415 }
1416 }
1417
1418 return (byte) value;
1419}
1420
1421/*
1422 copies characters from *text to *str until " or ' character is encountered,
1423 assumes that *str points to NULL object - caller is responsible for later freeing the string,
1424 assumes that *text points to " or ' character that starts the string,
1425 text pointer is advanced to point past the " or ' character,
1426 returns 0 if string was successfully copied,
1427 returns 1 otherwise
1428*/
1429static int get_string (const byte **text, byte **str)
1430{
1431 const byte *t = *text;
1432 byte *p = NULL;
1433 unsigned int len = 0;
1434 byte term_char;
1435
1436 if (string_grow (&p, &len, '\0'))
1437 return 1;
1438
1439 /* read " or ' character that starts the string */
1440 term_char = *t++;
1441 /* while next character is not the terminating character */
1442 while (*t && *t != term_char)
1443 {
1444 byte c;
1445
1446 if (*t == '\\')
1447 c = get_escape_sequence (&t);
1448 else
1449 c = *t++;
1450
1451 if (string_grow (&p, &len, c))
1452 {
Brian Paula6c423d2004-08-25 15:59:48 +00001453 mem_free ((void **) (void *) &p);
Michal Krola904b492004-03-04 13:07:52 +00001454 return 1;
1455 }
1456 }
1457 /* skip " or ' character that ends the string */
1458 t++;
1459
1460 *text = t;
1461 *str = p;
1462 return 0;
1463}
1464
1465/*
Michal Krol904ef742004-10-20 14:54:17 +00001466 gets emit code, the syntax is:
1467 ".emtcode" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
Michal Krola904b492004-03-04 13:07:52 +00001468 assumes that *text already points to <symbol>,
1469 returns 0 if emit code is successfully read,
1470 returns 1 otherwise
1471*/
1472static int get_emtcode (const byte **text, map_byte **ma)
1473{
1474 const byte *t = *text;
1475 map_byte *m = NULL;
1476
1477 map_byte_create (&m);
1478 if (m == NULL)
1479 return 1;
1480
1481 if (get_identifier (&t, &m->key))
1482 {
1483 map_byte_destroy (&m);
1484 return 1;
1485 }
1486 eat_spaces (&t);
1487
1488 if (*t == '\'')
1489 {
1490 byte *c;
1491
1492 if (get_string (&t, &c))
1493 {
1494 map_byte_destroy (&m);
1495 return 1;
1496 }
1497
1498 m->data = (byte) c[0];
Brian Paula6c423d2004-08-25 15:59:48 +00001499 mem_free ((void **) (void *) &c);
Michal Krola904b492004-03-04 13:07:52 +00001500 }
Michal Krol904ef742004-10-20 14:54:17 +00001501 else if (t[0] == '0' && (t[1] == 'x' || t[1] == 'X'))
Michal Krola904b492004-03-04 13:07:52 +00001502 {
1503 /* skip HEX "0x" or "0X" prefix */
1504 t += 2;
1505 m->data = (byte) hex_convert (&t);
1506 }
Michal Krol904ef742004-10-20 14:54:17 +00001507 else
1508 {
1509 m->data = (byte) dec_convert (&t);
1510 }
Michal Krola904b492004-03-04 13:07:52 +00001511
1512 eat_spaces (&t);
1513
1514 *text = t;
1515 *ma = m;
1516 return 0;
1517}
1518
1519/*
Michal Krol904ef742004-10-20 14:54:17 +00001520 gets regbyte declaration, the syntax is:
1521 ".regbyte" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
Michal Krola904b492004-03-04 13:07:52 +00001522 assumes that *text already points to <symbol>,
1523 returns 0 if regbyte is successfully read,
1524 returns 1 otherwise
1525*/
1526static int get_regbyte (const byte **text, map_byte **ma)
1527{
Michal Krol904ef742004-10-20 14:54:17 +00001528 /* pass it to the emtcode parser as it has the same syntax starting at <symbol> */
Michal Krola904b492004-03-04 13:07:52 +00001529 return get_emtcode (text, ma);
1530}
1531
1532/*
1533 returns 0 on success,
1534 returns 1 otherwise
1535*/
1536static int get_errtext (const byte **text, map_str **ma)
1537{
1538 const byte *t = *text;
1539 map_str *m = NULL;
1540
1541 map_str_create (&m);
1542 if (m == NULL)
1543 return 1;
1544
1545 if (get_identifier (&t, &m->key))
1546 {
1547 map_str_destroy (&m);
1548 return 1;
1549 }
1550 eat_spaces (&t);
1551
1552 if (get_string (&t, &m->data))
1553 {
1554 map_str_destroy (&m);
1555 return 1;
1556 }
1557 eat_spaces (&t);
1558
1559 *text = t;
1560 *ma = m;
1561 return 0;
1562}
1563
1564/*
1565 returns 0 on success,
1566 returns 1 otherwise,
1567*/
1568static int get_error (const byte **text, error **er, map_str *maps)
1569{
1570 const byte *t = *text;
1571 byte *temp = NULL;
1572
1573 if (*t != '.')
1574 return 0;
1575
1576 t++;
1577 if (get_identifier (&t, &temp))
1578 return 1;
1579 eat_spaces (&t);
1580
1581 if (!str_equal ((byte *) "error", temp))
1582 {
Brian Paula6c423d2004-08-25 15:59:48 +00001583 mem_free ((void **) (void *) &temp);
Michal Krola904b492004-03-04 13:07:52 +00001584 return 0;
1585 }
1586
Brian Paula6c423d2004-08-25 15:59:48 +00001587 mem_free ((void **) (void *) &temp);
Michal Krola904b492004-03-04 13:07:52 +00001588
1589 error_create (er);
1590 if (*er == NULL)
1591 return 1;
1592
1593 if (*t == '\"')
1594 {
1595 if (get_string (&t, &(**er).m_text))
1596 {
1597 error_destroy (er);
1598 return 1;
1599 }
1600 eat_spaces (&t);
1601 }
1602 else
1603 {
1604 if (get_identifier (&t, &temp))
1605 {
1606 error_destroy (er);
1607 return 1;
1608 }
1609 eat_spaces (&t);
1610
1611 if (map_str_find (&maps, temp, &(**er).m_text))
1612 {
Brian Paula6c423d2004-08-25 15:59:48 +00001613 mem_free ((void **) (void *) &temp);
Michal Krola904b492004-03-04 13:07:52 +00001614 error_destroy (er);
1615 return 1;
1616 }
1617
Brian Paula6c423d2004-08-25 15:59:48 +00001618 mem_free ((void **) (void *) &temp);
Michal Krola904b492004-03-04 13:07:52 +00001619 }
1620
1621 /* try to extract "token" from "...$token$..." */
1622 {
1623 byte *processed = NULL;
Michal Krola79d4e72006-10-19 08:07:00 +00001624 unsigned int len = 0;
1625 int i = 0;
Michal Krola904b492004-03-04 13:07:52 +00001626
1627 if (string_grow (&processed, &len, '\0'))
1628 {
1629 error_destroy (er);
1630 return 1;
1631 }
1632
1633 while (i < str_length ((**er).m_text))
1634 {
1635 /* check if the dollar sign is repeated - if so skip it */
1636 if ((**er).m_text[i] == '$' && (**er).m_text[i + 1] == '$')
1637 {
1638 if (string_grow (&processed, &len, '$'))
1639 {
Brian Paula6c423d2004-08-25 15:59:48 +00001640 mem_free ((void **) (void *) &processed);
Michal Krola904b492004-03-04 13:07:52 +00001641 error_destroy (er);
1642 return 1;
1643 }
1644
1645 i += 2;
1646 }
1647 else if ((**er).m_text[i] != '$')
1648 {
1649 if (string_grow (&processed, &len, (**er).m_text[i]))
1650 {
Brian Paula6c423d2004-08-25 15:59:48 +00001651 mem_free ((void **) (void *) &processed);
Michal Krola904b492004-03-04 13:07:52 +00001652 error_destroy (er);
1653 return 1;
1654 }
1655
1656 i++;
1657 }
1658 else
1659 {
1660 if (string_grow (&processed, &len, '$'))
1661 {
Brian Paula6c423d2004-08-25 15:59:48 +00001662 mem_free ((void **) (void *) &processed);
Michal Krola904b492004-03-04 13:07:52 +00001663 error_destroy (er);
1664 return 1;
1665 }
1666
1667 {
1668 /* length of token being extracted */
1669 unsigned int tlen = 0;
1670
1671 if (string_grow (&(**er).m_token_name, &tlen, '\0'))
1672 {
Brian Paula6c423d2004-08-25 15:59:48 +00001673 mem_free ((void **) (void *) &processed);
Michal Krola904b492004-03-04 13:07:52 +00001674 error_destroy (er);
1675 return 1;
1676 }
1677
1678 /* skip the dollar sign */
1679 i++;
1680
1681 while ((**er).m_text[i] != '$')
1682 {
1683 if (string_grow (&(**er).m_token_name, &tlen, (**er).m_text[i]))
1684 {
Brian Paula6c423d2004-08-25 15:59:48 +00001685 mem_free ((void **) (void *) &processed);
Michal Krola904b492004-03-04 13:07:52 +00001686 error_destroy (er);
1687 return 1;
1688 }
1689
1690 i++;
1691 }
1692
1693 /* skip the dollar sign */
1694 i++;
1695 }
1696 }
1697 }
1698
1699 mem_free ((void **) &(**er).m_text);
1700 (**er).m_text = processed;
1701 }
1702
1703 *text = t;
1704 return 0;
1705}
1706
1707/*
1708 returns 0 on success,
1709 returns 1 otherwise,
1710*/
1711static int get_emits (const byte **text, emit **em, map_byte *mapb)
1712{
1713 const byte *t = *text;
1714 byte *temp = NULL;
1715 emit *e = NULL;
1716 emit_dest dest;
1717
1718 if (*t != '.')
1719 return 0;
1720
1721 t++;
1722 if (get_identifier (&t, &temp))
1723 return 1;
1724 eat_spaces (&t);
1725
1726 /* .emit */
1727 if (str_equal ((byte *) "emit", temp))
1728 dest = ed_output;
1729 /* .load */
1730 else if (str_equal ((byte *) "load", temp))
1731 dest = ed_regbyte;
1732 else
1733 {
Brian Paula6c423d2004-08-25 15:59:48 +00001734 mem_free ((void **) (void *) &temp);
Michal Krola904b492004-03-04 13:07:52 +00001735 return 0;
1736 }
1737
Brian Paula6c423d2004-08-25 15:59:48 +00001738 mem_free ((void **) (void *) &temp);
Michal Krola904b492004-03-04 13:07:52 +00001739
1740 emit_create (&e);
1741 if (e == NULL)
1742 return 1;
1743
1744 e->m_emit_dest = dest;
1745
1746 if (dest == ed_regbyte)
1747 {
1748 if (get_identifier (&t, &e->m_regname))
1749 {
1750 emit_destroy (&e);
1751 return 1;
1752 }
1753 eat_spaces (&t);
1754 }
1755
1756 /* 0xNN */
Michal Krol904ef742004-10-20 14:54:17 +00001757 if (*t == '0' && (t[1] == 'x' || t[1] == 'X'))
Michal Krola904b492004-03-04 13:07:52 +00001758 {
1759 t += 2;
1760 e->m_byte = (byte) hex_convert (&t);
1761
1762 e->m_emit_type = et_byte;
1763 }
Michal Krol904ef742004-10-20 14:54:17 +00001764 /* NNN */
1765 else if (*t >= '0' && *t <= '9')
1766 {
1767 e->m_byte = (byte) dec_convert (&t);
1768
1769 e->m_emit_type = et_byte;
1770 }
Michal Krola904b492004-03-04 13:07:52 +00001771 /* * */
1772 else if (*t == '*')
1773 {
1774 t++;
1775
1776 e->m_emit_type = et_stream;
1777 }
1778 /* $ */
1779 else if (*t == '$')
1780 {
1781 t++;
1782
1783 e->m_emit_type = et_position;
1784 }
1785 /* 'c' */
1786 else if (*t == '\'')
1787 {
1788 if (get_string (&t, &temp))
1789 {
1790 emit_destroy (&e);
1791 return 1;
1792 }
1793 e->m_byte = (byte) temp[0];
1794
Brian Paula6c423d2004-08-25 15:59:48 +00001795 mem_free ((void **) (void *) &temp);
Michal Krola904b492004-03-04 13:07:52 +00001796
1797 e->m_emit_type = et_byte;
1798 }
1799 else
1800 {
1801 if (get_identifier (&t, &temp))
1802 {
1803 emit_destroy (&e);
1804 return 1;
1805 }
1806
1807 if (map_byte_find (&mapb, temp, &e->m_byte))
1808 {
Brian Paula6c423d2004-08-25 15:59:48 +00001809 mem_free ((void **) (void *) &temp);
Michal Krola904b492004-03-04 13:07:52 +00001810 emit_destroy (&e);
1811 return 1;
1812 }
1813
Brian Paula6c423d2004-08-25 15:59:48 +00001814 mem_free ((void **) (void *) &temp);
Michal Krola904b492004-03-04 13:07:52 +00001815
1816 e->m_emit_type = et_byte;
1817 }
1818
1819 eat_spaces (&t);
1820
1821 if (get_emits (&t, &e->m_next, mapb))
1822 {
1823 emit_destroy (&e);
1824 return 1;
1825 }
1826
1827 *text = t;
1828 *em = e;
1829 return 0;
1830}
1831
1832/*
1833 returns 0 on success,
1834 returns 1 otherwise,
1835*/
1836static int get_spec (const byte **text, spec **sp, map_str *maps, map_byte *mapb)
1837{
1838 const byte *t = *text;
1839 spec *s = NULL;
1840
1841 spec_create (&s);
1842 if (s == NULL)
1843 return 1;
1844
1845 /* first - read optional .if statement */
1846 if (*t == '.')
1847 {
1848 const byte *u = t;
1849 byte *keyword = NULL;
1850
1851 /* skip the dot */
1852 u++;
1853
1854 if (get_identifier (&u, &keyword))
1855 {
1856 spec_destroy (&s);
1857 return 1;
1858 }
1859
1860 /* .if */
1861 if (str_equal ((byte *) "if", keyword))
1862 {
1863 cond_create (&s->m_cond);
1864 if (s->m_cond == NULL)
1865 {
1866 spec_destroy (&s);
1867 return 1;
1868 }
1869
1870 /* skip the left paren */
1871 eat_spaces (&u);
1872 u++;
1873
1874 /* get the left operand */
1875 eat_spaces (&u);
1876 if (get_identifier (&u, &s->m_cond->m_operands[0].m_regname))
1877 {
1878 spec_destroy (&s);
1879 return 1;
1880 }
1881 s->m_cond->m_operands[0].m_type = cot_regbyte;
1882
1883 /* get the operator (!= or ==) */
1884 eat_spaces (&u);
1885 if (*u == '!')
1886 s->m_cond->m_type = ct_not_equal;
1887 else
1888 s->m_cond->m_type = ct_equal;
1889 u += 2;
Michal Krola904b492004-03-04 13:07:52 +00001890 eat_spaces (&u);
Michal Krola904b492004-03-04 13:07:52 +00001891
Michal Krol904ef742004-10-20 14:54:17 +00001892 if (u[0] == '0' && (u[1] == 'x' || u[1] == 'X'))
1893 {
1894 /* skip the 0x prefix */
1895 u += 2;
1896
1897 /* get the right operand */
1898 s->m_cond->m_operands[1].m_byte = hex_convert (&u);
1899 s->m_cond->m_operands[1].m_type = cot_byte;
1900 }
1901 else /*if (*u >= '0' && *u <= '9')*/
1902 {
1903 /* get the right operand */
1904 s->m_cond->m_operands[1].m_byte = dec_convert (&u);
1905 s->m_cond->m_operands[1].m_type = cot_byte;
1906 }
Michal Krola904b492004-03-04 13:07:52 +00001907
1908 /* skip the right paren */
1909 eat_spaces (&u);
1910 u++;
1911
1912 eat_spaces (&u);
1913
1914 t = u;
1915 }
1916
Brian Paula6c423d2004-08-25 15:59:48 +00001917 mem_free ((void **) (void *) &keyword);
Michal Krola904b492004-03-04 13:07:52 +00001918 }
1919
1920 if (*t == '\'')
1921 {
1922 byte *temp = NULL;
1923
1924 if (get_string (&t, &temp))
1925 {
1926 spec_destroy (&s);
1927 return 1;
1928 }
1929 eat_spaces (&t);
1930
1931 if (*t == '-')
1932 {
1933 byte *temp2 = NULL;
1934
1935 /* skip the '-' character */
1936 t++;
1937 eat_spaces (&t);
1938
1939 if (get_string (&t, &temp2))
1940 {
Brian Paula6c423d2004-08-25 15:59:48 +00001941 mem_free ((void **) (void *) &temp);
Michal Krola904b492004-03-04 13:07:52 +00001942 spec_destroy (&s);
1943 return 1;
1944 }
1945 eat_spaces (&t);
1946
1947 s->m_spec_type = st_byte_range;
1948 s->m_byte[0] = *temp;
1949 s->m_byte[1] = *temp2;
1950
Brian Paula6c423d2004-08-25 15:59:48 +00001951 mem_free ((void **) (void *) &temp2);
Michal Krola904b492004-03-04 13:07:52 +00001952 }
1953 else
1954 {
1955 s->m_spec_type = st_byte;
1956 *s->m_byte = *temp;
1957 }
1958
Brian Paula6c423d2004-08-25 15:59:48 +00001959 mem_free ((void **) (void *) &temp);
Michal Krola904b492004-03-04 13:07:52 +00001960 }
1961 else if (*t == '"')
1962 {
1963 if (get_string (&t, &s->m_string))
1964 {
1965 spec_destroy (&s);
1966 return 1;
1967 }
1968 eat_spaces (&t);
1969
1970 s->m_spec_type = st_string;
1971 }
1972 else if (*t == '.')
1973 {
1974 byte *keyword = NULL;
1975
1976 /* skip the dot */
1977 t++;
1978
1979 if (get_identifier (&t, &keyword))
1980 {
1981 spec_destroy (&s);
1982 return 1;
1983 }
1984 eat_spaces (&t);
1985
1986 /* .true */
1987 if (str_equal ((byte *) "true", keyword))
1988 {
1989 s->m_spec_type = st_true;
1990 }
1991 /* .false */
1992 else if (str_equal ((byte *) "false", keyword))
1993 {
1994 s->m_spec_type = st_false;
1995 }
1996 /* .debug */
1997 else if (str_equal ((byte *) "debug", keyword))
1998 {
1999 s->m_spec_type = st_debug;
2000 }
2001 /* .loop */
2002 else if (str_equal ((byte *) "loop", keyword))
2003 {
2004 if (get_identifier (&t, &s->m_string))
2005 {
Brian Paula6c423d2004-08-25 15:59:48 +00002006 mem_free ((void **) (void *) &keyword);
Michal Krola904b492004-03-04 13:07:52 +00002007 spec_destroy (&s);
2008 return 1;
2009 }
2010 eat_spaces (&t);
2011
2012 s->m_spec_type = st_identifier_loop;
2013 }
Brian Paula6c423d2004-08-25 15:59:48 +00002014 mem_free ((void **) (void *) &keyword);
Michal Krola904b492004-03-04 13:07:52 +00002015 }
2016 else
2017 {
2018 if (get_identifier (&t, &s->m_string))
2019 {
2020 spec_destroy (&s);
2021 return 1;
2022 }
2023 eat_spaces (&t);
2024
2025 s->m_spec_type = st_identifier;
2026 }
2027
2028 if (get_error (&t, &s->m_errtext, maps))
2029 {
2030 spec_destroy (&s);
2031 return 1;
2032 }
2033
2034 if (get_emits (&t, &s->m_emits, mapb))
2035 {
2036 spec_destroy (&s);
2037 return 1;
2038 }
2039
2040 *text = t;
2041 *sp = s;
2042 return 0;
2043}
2044
2045/*
2046 returns 0 on success,
2047 returns 1 otherwise,
2048*/
2049static int get_rule (const byte **text, rule **ru, map_str *maps, map_byte *mapb)
2050{
2051 const byte *t = *text;
2052 rule *r = NULL;
2053
2054 rule_create (&r);
2055 if (r == NULL)
2056 return 1;
2057
2058 if (get_spec (&t, &r->m_specs, maps, mapb))
2059 {
2060 rule_destroy (&r);
2061 return 1;
2062 }
2063
2064 while (*t != ';')
2065 {
2066 byte *op = NULL;
2067 spec *sp = NULL;
2068
2069 /* skip the dot that precedes "and" or "or" */
2070 t++;
2071
2072 /* read "and" or "or" keyword */
2073 if (get_identifier (&t, &op))
2074 {
2075 rule_destroy (&r);
2076 return 1;
2077 }
2078 eat_spaces (&t);
2079
2080 if (r->m_oper == op_none)
2081 {
2082 /* .and */
2083 if (str_equal ((byte *) "and", op))
2084 r->m_oper = op_and;
2085 /* .or */
2086 else
2087 r->m_oper = op_or;
2088 }
2089
Brian Paula6c423d2004-08-25 15:59:48 +00002090 mem_free ((void **) (void *) &op);
Michal Krola904b492004-03-04 13:07:52 +00002091
2092 if (get_spec (&t, &sp, maps, mapb))
2093 {
2094 rule_destroy (&r);
2095 return 1;
2096 }
2097
Michal Krol904ef742004-10-20 14:54:17 +00002098 spec_append (&r->m_specs, sp);
Michal Krola904b492004-03-04 13:07:52 +00002099 }
2100
2101 /* skip the semicolon */
2102 t++;
2103 eat_spaces (&t);
2104
2105 *text = t;
2106 *ru = r;
2107 return 0;
2108}
2109
2110/*
2111 returns 0 on success,
2112 returns 1 otherwise,
2113*/
2114static int update_dependency (map_rule *mapr, byte *symbol, rule **ru)
2115{
2116 if (map_rule_find (&mapr, symbol, ru))
2117 return 1;
2118
Michal Krol904ef742004-10-20 14:54:17 +00002119 (**ru).m_referenced = 1;
Michal Krola904b492004-03-04 13:07:52 +00002120
2121 return 0;
2122}
2123
2124/*
2125 returns 0 on success,
2126 returns 1 otherwise,
2127*/
2128static int update_dependencies (dict *di, map_rule *mapr, byte **syntax_symbol,
2129 byte **string_symbol, map_byte *regbytes)
2130{
2131 rule *rulez = di->m_rulez;
2132
2133 /* update dependecies for the root and lexer symbols */
2134 if (update_dependency (mapr, *syntax_symbol, &di->m_syntax) ||
2135 (*string_symbol != NULL && update_dependency (mapr, *string_symbol, &di->m_string)))
2136 return 1;
2137
2138 mem_free ((void **) syntax_symbol);
2139 mem_free ((void **) string_symbol);
2140
2141 /* update dependecies for the rest of the rules */
2142 while (rulez)
2143 {
2144 spec *sp = rulez->m_specs;
2145
2146 /* iterate through all the specifiers */
2147 while (sp)
2148 {
2149 /* update dependency for identifier */
2150 if (sp->m_spec_type == st_identifier || sp->m_spec_type == st_identifier_loop)
2151 {
2152 if (update_dependency (mapr, sp->m_string, &sp->m_rule))
2153 return 1;
2154
2155 mem_free ((void **) &sp->m_string);
2156 }
2157
2158 /* some errtexts reference to a rule */
2159 if (sp->m_errtext && sp->m_errtext->m_token_name)
2160 {
2161 if (update_dependency (mapr, sp->m_errtext->m_token_name, &sp->m_errtext->m_token))
2162 return 1;
2163
2164 mem_free ((void **) &sp->m_errtext->m_token_name);
2165 }
2166
2167 /* update dependency for condition */
2168 if (sp->m_cond)
2169 {
2170 int i;
2171 for (i = 0; i < 2; i++)
2172 if (sp->m_cond->m_operands[i].m_type == cot_regbyte)
2173 {
2174 sp->m_cond->m_operands[i].m_regbyte = map_byte_locate (&regbytes,
2175 sp->m_cond->m_operands[i].m_regname);
2176
2177 if (sp->m_cond->m_operands[i].m_regbyte == NULL)
2178 return 1;
2179
2180 mem_free ((void **) &sp->m_cond->m_operands[i].m_regname);
2181 }
2182 }
2183
2184 /* update dependency for all .load instructions */
2185 if (sp->m_emits)
2186 {
2187 emit *em = sp->m_emits;
2188 while (em != NULL)
2189 {
2190 if (em->m_emit_dest == ed_regbyte)
2191 {
2192 em->m_regbyte = map_byte_locate (&regbytes, em->m_regname);
2193
2194 if (em->m_regbyte == NULL)
2195 return 1;
2196
2197 mem_free ((void **) &em->m_regname);
2198 }
2199
2200 em = em->m_next;
2201 }
2202 }
2203
Michal Krol904ef742004-10-20 14:54:17 +00002204 sp = sp->next;
Michal Krola904b492004-03-04 13:07:52 +00002205 }
2206
Michal Krol904ef742004-10-20 14:54:17 +00002207 rulez = rulez->next;
Michal Krola904b492004-03-04 13:07:52 +00002208 }
2209
Michal Krol904ef742004-10-20 14:54:17 +00002210 /* check for unreferenced symbols */
2211 rulez = di->m_rulez;
2212 while (rulez != NULL)
Michal Krola904b492004-03-04 13:07:52 +00002213 {
Michal Krol904ef742004-10-20 14:54:17 +00002214 if (!rulez->m_referenced)
Michal Krola904b492004-03-04 13:07:52 +00002215 {
Michal Krol904ef742004-10-20 14:54:17 +00002216 map_rule *ma = mapr;
Michal Krola904b492004-03-04 13:07:52 +00002217 while (ma)
2218 {
Michal Krol904ef742004-10-20 14:54:17 +00002219 if (ma->data == rulez)
Michal Krola904b492004-03-04 13:07:52 +00002220 {
Michal Krol904ef742004-10-20 14:54:17 +00002221 set_last_error (UNREFERENCED_IDENTIFIER, str_duplicate (ma->key), -1);
2222 return 1;
Michal Krola904b492004-03-04 13:07:52 +00002223 }
2224 ma = ma->next;
2225 }
2226 }
Michal Krol904ef742004-10-20 14:54:17 +00002227 rulez = rulez->next;
Michal Krola904b492004-03-04 13:07:52 +00002228 }
Michal Krol904ef742004-10-20 14:54:17 +00002229
Michal Krola904b492004-03-04 13:07:52 +00002230 return 0;
2231}
2232
2233static int satisfies_condition (cond *co, regbyte_ctx *ctx)
2234{
2235 byte values[2];
2236 int i;
2237
2238 if (co == NULL)
2239 return 1;
2240
2241 for (i = 0; i < 2; i++)
2242 switch (co->m_operands[i].m_type)
2243 {
2244 case cot_byte:
2245 values[i] = co->m_operands[i].m_byte;
2246 break;
2247 case cot_regbyte:
2248 values[i] = regbyte_ctx_extract (&ctx, co->m_operands[i].m_regbyte);
2249 break;
2250 }
2251
2252 switch (co->m_type)
2253 {
2254 case ct_equal:
2255 return values[0] == values[1];
2256 case ct_not_equal:
2257 return values[0] != values[1];
2258 }
2259
2260 return 0;
2261}
2262
2263static void free_regbyte_ctx_stack (regbyte_ctx *top, regbyte_ctx *limit)
2264{
2265 while (top != limit)
2266 {
2267 regbyte_ctx *rbc = top->m_prev;
2268 regbyte_ctx_destroy (&top);
2269 top = rbc;
2270 }
2271}
2272
2273typedef enum match_result_
2274{
2275 mr_not_matched, /* the examined string does not match */
2276 mr_matched, /* the examined string matches */
2277 mr_error_raised, /* mr_not_matched + error has been raised */
2278 mr_dont_emit, /* used by identifier loops only */
2279 mr_internal_error /* an internal error has occured such as out of memory */
2280} match_result;
2281
2282/*
Michal Krola79d4e72006-10-19 08:07:00 +00002283 * This function does the main job. It parses the text and generates output data.
2284 */
2285static match_result
2286match (dict *di, const byte *text, int *index, rule *ru, barray **ba, int filtering_string,
2287 regbyte_ctx **rbc)
Michal Krola904b492004-03-04 13:07:52 +00002288{
Michal Krola79d4e72006-10-19 08:07:00 +00002289 int ind = *index;
Michal Krola904b492004-03-04 13:07:52 +00002290 match_result status = mr_not_matched;
2291 spec *sp = ru->m_specs;
2292 regbyte_ctx *ctx = *rbc;
2293
2294 /* for every specifier in the rule */
2295 while (sp)
2296 {
Michal Krola79d4e72006-10-19 08:07:00 +00002297 int i, len, save_ind = ind;
Michal Krola904b492004-03-04 13:07:52 +00002298 barray *array = NULL;
2299
2300 if (satisfies_condition (sp->m_cond, ctx))
2301 {
2302 switch (sp->m_spec_type)
2303 {
2304 case st_identifier:
2305 barray_create (&array);
2306 if (array == NULL)
2307 {
2308 free_regbyte_ctx_stack (ctx, *rbc);
2309 return mr_internal_error;
2310 }
2311
2312 status = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
Michal Krol904ef742004-10-20 14:54:17 +00002313
Michal Krola904b492004-03-04 13:07:52 +00002314 if (status == mr_internal_error)
2315 {
2316 free_regbyte_ctx_stack (ctx, *rbc);
2317 barray_destroy (&array);
2318 return mr_internal_error;
2319 }
2320 break;
2321 case st_string:
2322 len = str_length (sp->m_string);
2323
2324 /* prefilter the stream */
2325 if (!filtering_string && di->m_string)
2326 {
2327 barray *ba;
Michal Krola79d4e72006-10-19 08:07:00 +00002328 int filter_index = 0;
Michal Krola904b492004-03-04 13:07:52 +00002329 match_result result;
2330 regbyte_ctx *null_ctx = NULL;
2331
2332 barray_create (&ba);
2333 if (ba == NULL)
2334 {
2335 free_regbyte_ctx_stack (ctx, *rbc);
2336 return mr_internal_error;
2337 }
2338
2339 result = match (di, text + ind, &filter_index, di->m_string, &ba, 1, &null_ctx);
2340
2341 if (result == mr_internal_error)
2342 {
2343 free_regbyte_ctx_stack (ctx, *rbc);
2344 barray_destroy (&ba);
2345 return mr_internal_error;
2346 }
2347
2348 if (result != mr_matched)
2349 {
2350 barray_destroy (&ba);
2351 status = mr_not_matched;
2352 break;
2353 }
2354
2355 barray_destroy (&ba);
2356
2357 if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len))
2358 {
2359 status = mr_not_matched;
2360 break;
2361 }
2362
2363 status = mr_matched;
2364 ind += len;
2365 }
2366 else
2367 {
2368 status = mr_matched;
2369 for (i = 0; status == mr_matched && i < len; i++)
2370 if (text[ind + i] != sp->m_string[i])
2371 status = mr_not_matched;
Michal Krol904ef742004-10-20 14:54:17 +00002372
Michal Krola904b492004-03-04 13:07:52 +00002373 if (status == mr_matched)
2374 ind += len;
2375 }
2376 break;
2377 case st_byte:
2378 status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
2379 if (status == mr_matched)
2380 ind++;
2381 break;
2382 case st_byte_range:
2383 status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
2384 mr_matched : mr_not_matched;
2385 if (status == mr_matched)
2386 ind++;
2387 break;
2388 case st_true:
2389 status = mr_matched;
2390 break;
2391 case st_false:
2392 status = mr_not_matched;
2393 break;
2394 case st_debug:
2395 status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
2396 break;
2397 case st_identifier_loop:
2398 barray_create (&array);
2399 if (array == NULL)
2400 {
2401 free_regbyte_ctx_stack (ctx, *rbc);
2402 return mr_internal_error;
2403 }
2404
2405 status = mr_dont_emit;
2406 for (;;)
2407 {
2408 match_result result;
2409
2410 save_ind = ind;
2411 result = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);
2412
2413 if (result == mr_error_raised)
2414 {
2415 status = result;
2416 break;
2417 }
2418 else if (result == mr_matched)
2419 {
2420 if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx) ||
2421 barray_append (ba, &array))
2422 {
2423 free_regbyte_ctx_stack (ctx, *rbc);
2424 barray_destroy (&array);
2425 return mr_internal_error;
2426 }
2427 barray_destroy (&array);
2428 barray_create (&array);
2429 if (array == NULL)
2430 {
2431 free_regbyte_ctx_stack (ctx, *rbc);
2432 return mr_internal_error;
2433 }
2434 }
2435 else if (result == mr_internal_error)
2436 {
2437 free_regbyte_ctx_stack (ctx, *rbc);
2438 barray_destroy (&array);
2439 return mr_internal_error;
2440 }
2441 else
2442 break;
2443 }
2444 break;
2445 }
2446 }
2447 else
2448 {
2449 status = mr_not_matched;
2450 }
2451
2452 if (status == mr_error_raised)
2453 {
2454 free_regbyte_ctx_stack (ctx, *rbc);
2455 barray_destroy (&array);
2456
2457 return mr_error_raised;
2458 }
2459
2460 if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
2461 {
2462 free_regbyte_ctx_stack (ctx, *rbc);
2463 barray_destroy (&array);
2464
2465 if (sp->m_errtext)
2466 {
2467 set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
2468 ind), ind);
2469
2470 return mr_error_raised;
2471 }
2472
2473 return mr_not_matched;
2474 }
2475
2476 if (status == mr_matched)
2477 {
2478 if (sp->m_emits)
2479 if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx))
2480 {
2481 free_regbyte_ctx_stack (ctx, *rbc);
2482 barray_destroy (&array);
2483 return mr_internal_error;
2484 }
2485
2486 if (array)
2487 if (barray_append (ba, &array))
2488 {
2489 free_regbyte_ctx_stack (ctx, *rbc);
2490 barray_destroy (&array);
2491 return mr_internal_error;
2492 }
2493 }
2494
2495 barray_destroy (&array);
2496
2497 /* if the rule operator is a logical or, we pick up the first matching specifier */
2498 if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit))
2499 {
2500 *index = ind;
2501 *rbc = ctx;
2502 return mr_matched;
2503 }
2504
Michal Krol904ef742004-10-20 14:54:17 +00002505 sp = sp->next;
Michal Krola904b492004-03-04 13:07:52 +00002506 }
2507
2508 /* everything went fine - all specifiers match up */
2509 if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit))
2510 {
2511 *index = ind;
2512 *rbc = ctx;
2513 return mr_matched;
2514 }
2515
2516 free_regbyte_ctx_stack (ctx, *rbc);
2517 return mr_not_matched;
2518}
2519
Michal Krola79d4e72006-10-19 08:07:00 +00002520static match_result
2521fast_match (dict *di, const byte *text, int *index, rule *ru, int *_PP, bytepool *_BP,
2522 int filtering_string, regbyte_ctx **rbc)
Michal Krol904ef742004-10-20 14:54:17 +00002523{
Michal Krola79d4e72006-10-19 08:07:00 +00002524 int ind = *index;
Michal Krol904ef742004-10-20 14:54:17 +00002525 int _P = filtering_string ? 0 : *_PP;
2526 int _P2;
2527 match_result status = mr_not_matched;
2528 spec *sp = ru->m_specs;
2529 regbyte_ctx *ctx = *rbc;
2530
2531 /* for every specifier in the rule */
2532 while (sp)
2533 {
Michal Krola79d4e72006-10-19 08:07:00 +00002534 int i, len, save_ind = ind;
Michal Krol904ef742004-10-20 14:54:17 +00002535
2536 _P2 = _P + (sp->m_emits ? emit_size (sp->m_emits) : 0);
2537 if (bytepool_reserve (_BP, _P2))
2538 {
2539 free_regbyte_ctx_stack (ctx, *rbc);
2540 return mr_internal_error;
2541 }
2542
2543 if (satisfies_condition (sp->m_cond, ctx))
2544 {
2545 switch (sp->m_spec_type)
2546 {
2547 case st_identifier:
2548 status = fast_match (di, text, &ind, sp->m_rule, &_P2, _BP, filtering_string, &ctx);
2549
2550 if (status == mr_internal_error)
2551 {
2552 free_regbyte_ctx_stack (ctx, *rbc);
2553 return mr_internal_error;
2554 }
2555 break;
2556 case st_string:
2557 len = str_length (sp->m_string);
2558
2559 /* prefilter the stream */
2560 if (!filtering_string && di->m_string)
2561 {
Michal Krola79d4e72006-10-19 08:07:00 +00002562 int filter_index = 0;
Michal Krol904ef742004-10-20 14:54:17 +00002563 match_result result;
2564 regbyte_ctx *null_ctx = NULL;
2565
2566 result = fast_match (di, text + ind, &filter_index, di->m_string, NULL, _BP, 1, &null_ctx);
2567
2568 if (result == mr_internal_error)
2569 {
2570 free_regbyte_ctx_stack (ctx, *rbc);
2571 return mr_internal_error;
2572 }
2573
2574 if (result != mr_matched)
2575 {
2576 status = mr_not_matched;
2577 break;
2578 }
2579
2580 if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len))
2581 {
2582 status = mr_not_matched;
2583 break;
2584 }
2585
2586 status = mr_matched;
2587 ind += len;
2588 }
2589 else
2590 {
2591 status = mr_matched;
2592 for (i = 0; status == mr_matched && i < len; i++)
2593 if (text[ind + i] != sp->m_string[i])
2594 status = mr_not_matched;
2595
2596 if (status == mr_matched)
2597 ind += len;
2598 }
2599 break;
2600 case st_byte:
2601 status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;
2602 if (status == mr_matched)
2603 ind++;
2604 break;
2605 case st_byte_range:
2606 status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?
2607 mr_matched : mr_not_matched;
2608 if (status == mr_matched)
2609 ind++;
2610 break;
2611 case st_true:
2612 status = mr_matched;
2613 break;
2614 case st_false:
2615 status = mr_not_matched;
2616 break;
2617 case st_debug:
2618 status = ru->m_oper == op_and ? mr_matched : mr_not_matched;
2619 break;
2620 case st_identifier_loop:
2621 status = mr_dont_emit;
2622 for (;;)
2623 {
2624 match_result result;
2625
2626 save_ind = ind;
2627 result = fast_match (di, text, &ind, sp->m_rule, &_P2, _BP, filtering_string, &ctx);
2628
2629 if (result == mr_error_raised)
2630 {
2631 status = result;
2632 break;
2633 }
2634 else if (result == mr_matched)
2635 {
2636 if (!filtering_string)
2637 {
2638 if (sp->m_emits != NULL)
2639 {
2640 if (emit_push (sp->m_emits, _BP->_F + _P, text[ind - 1], save_ind, &ctx))
2641 {
2642 free_regbyte_ctx_stack (ctx, *rbc);
2643 return mr_internal_error;
2644 }
2645 }
2646
2647 _P = _P2;
2648 _P2 += sp->m_emits ? emit_size (sp->m_emits) : 0;
2649 if (bytepool_reserve (_BP, _P2))
2650 {
2651 free_regbyte_ctx_stack (ctx, *rbc);
2652 return mr_internal_error;
2653 }
2654 }
2655 }
2656 else if (result == mr_internal_error)
2657 {
2658 free_regbyte_ctx_stack (ctx, *rbc);
2659 return mr_internal_error;
2660 }
2661 else
2662 break;
2663 }
2664 break;
2665 }
2666 }
2667 else
2668 {
2669 status = mr_not_matched;
2670 }
2671
2672 if (status == mr_error_raised)
2673 {
2674 free_regbyte_ctx_stack (ctx, *rbc);
2675
2676 return mr_error_raised;
2677 }
2678
2679 if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)
2680 {
2681 free_regbyte_ctx_stack (ctx, *rbc);
2682
2683 if (sp->m_errtext)
2684 {
2685 set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,
2686 ind), ind);
2687
2688 return mr_error_raised;
2689 }
2690
2691 return mr_not_matched;
2692 }
2693
2694 if (status == mr_matched)
2695 {
Brian020cdb42007-04-19 14:15:11 -06002696 if (sp->m_emits != NULL) {
2697 const byte ch = (ind <= 0) ? 0 : text[ind - 1];
2698 if (emit_push (sp->m_emits, _BP->_F + _P, ch, save_ind, &ctx))
Michal Krol904ef742004-10-20 14:54:17 +00002699 {
2700 free_regbyte_ctx_stack (ctx, *rbc);
2701 return mr_internal_error;
2702 }
2703
Brian020cdb42007-04-19 14:15:11 -06002704 }
2705 _P = _P2;
Michal Krol904ef742004-10-20 14:54:17 +00002706 }
2707
2708 /* if the rule operator is a logical or, we pick up the first matching specifier */
2709 if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit))
2710 {
2711 *index = ind;
2712 *rbc = ctx;
2713 if (!filtering_string)
2714 *_PP = _P;
2715 return mr_matched;
2716 }
2717
2718 sp = sp->next;
2719 }
2720
2721 /* everything went fine - all specifiers match up */
2722 if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit))
2723 {
2724 *index = ind;
2725 *rbc = ctx;
2726 if (!filtering_string)
2727 *_PP = _P;
2728 return mr_matched;
2729 }
2730
2731 free_regbyte_ctx_stack (ctx, *rbc);
2732 return mr_not_matched;
2733}
2734
Michal Krola79d4e72006-10-19 08:07:00 +00002735static byte *
2736error_get_token (error *er, dict *di, const byte *text, int ind)
Michal Krola904b492004-03-04 13:07:52 +00002737{
2738 byte *str = NULL;
2739
2740 if (er->m_token)
2741 {
2742 barray *ba;
Michal Krola79d4e72006-10-19 08:07:00 +00002743 int filter_index = 0;
Michal Krola904b492004-03-04 13:07:52 +00002744 regbyte_ctx *ctx = NULL;
2745
2746 barray_create (&ba);
2747 if (ba != NULL)
2748 {
2749 if (match (di, text + ind, &filter_index, er->m_token, &ba, 0, &ctx) == mr_matched &&
2750 filter_index)
2751 {
Brian Paulbdd15b52004-05-04 15:11:06 +00002752 str = (byte *) mem_alloc (filter_index + 1);
Michal Krola904b492004-03-04 13:07:52 +00002753 if (str != NULL)
2754 {
2755 str_copy_n (str, text + ind, filter_index);
2756 str[filter_index] = '\0';
2757 }
2758 }
2759 barray_destroy (&ba);
2760 }
2761 }
2762
2763 return str;
2764}
2765
2766typedef struct grammar_load_state_
2767{
2768 dict *di;
2769 byte *syntax_symbol;
2770 byte *string_symbol;
2771 map_str *maps;
2772 map_byte *mapb;
2773 map_rule *mapr;
2774} grammar_load_state;
2775
2776static void grammar_load_state_create (grammar_load_state **gr)
2777{
Brian Paulbdd15b52004-05-04 15:11:06 +00002778 *gr = (grammar_load_state *) mem_alloc (sizeof (grammar_load_state));
Michal Krola904b492004-03-04 13:07:52 +00002779 if (*gr)
2780 {
2781 (**gr).di = NULL;
2782 (**gr).syntax_symbol = NULL;
2783 (**gr).string_symbol = NULL;
2784 (**gr).maps = NULL;
2785 (**gr).mapb = NULL;
2786 (**gr).mapr = NULL;
2787 }
2788}
2789
2790static void grammar_load_state_destroy (grammar_load_state **gr)
2791{
2792 if (*gr)
2793 {
2794 dict_destroy (&(**gr).di);
2795 mem_free ((void **) &(**gr).syntax_symbol);
2796 mem_free ((void **) &(**gr).string_symbol);
2797 map_str_destroy (&(**gr).maps);
2798 map_byte_destroy (&(**gr).mapb);
2799 map_rule_destroy (&(**gr).mapr);
2800 mem_free ((void **) gr);
2801 }
2802}
2803
Briandceae282007-01-19 12:02:31 -07002804
2805static void error_msg(int line, const char *msg)
2806{
2807 fprintf(stderr, "Error in grammar_load_from_text() at line %d: %s\n", line, msg);
2808}
2809
2810
Michal Krola904b492004-03-04 13:07:52 +00002811/*
2812 the API
2813*/
Michal Krola904b492004-03-04 13:07:52 +00002814grammar grammar_load_from_text (const byte *text)
2815{
2816 grammar_load_state *g = NULL;
2817 grammar id = 0;
2818
2819 clear_last_error ();
2820
2821 grammar_load_state_create (&g);
Briandceae282007-01-19 12:02:31 -07002822 if (g == NULL) {
2823 error_msg(__LINE__, "");
Michal Krola904b492004-03-04 13:07:52 +00002824 return 0;
Briandceae282007-01-19 12:02:31 -07002825 }
Michal Krola904b492004-03-04 13:07:52 +00002826
2827 dict_create (&g->di);
2828 if (g->di == NULL)
2829 {
2830 grammar_load_state_destroy (&g);
Briandceae282007-01-19 12:02:31 -07002831 error_msg(__LINE__, "");
Michal Krola904b492004-03-04 13:07:52 +00002832 return 0;
2833 }
2834
2835 eat_spaces (&text);
2836
2837 /* skip ".syntax" keyword */
2838 text += 7;
2839 eat_spaces (&text);
2840
2841 /* retrieve root symbol */
2842 if (get_identifier (&text, &g->syntax_symbol))
2843 {
2844 grammar_load_state_destroy (&g);
Briandceae282007-01-19 12:02:31 -07002845 error_msg(__LINE__, "");
Michal Krola904b492004-03-04 13:07:52 +00002846 return 0;
2847 }
2848 eat_spaces (&text);
2849
2850 /* skip semicolon */
2851 text++;
2852 eat_spaces (&text);
2853
2854 while (*text)
2855 {
2856 byte *symbol = NULL;
2857 int is_dot = *text == '.';
2858
2859 if (is_dot)
2860 text++;
2861
2862 if (get_identifier (&text, &symbol))
2863 {
2864 grammar_load_state_destroy (&g);
Briandceae282007-01-19 12:02:31 -07002865 error_msg(__LINE__, "");
Michal Krola904b492004-03-04 13:07:52 +00002866 return 0;
2867 }
2868 eat_spaces (&text);
2869
2870 /* .emtcode */
2871 if (is_dot && str_equal (symbol, (byte *) "emtcode"))
2872 {
2873 map_byte *ma = NULL;
2874
Brian Paula6c423d2004-08-25 15:59:48 +00002875 mem_free ((void **) (void *) &symbol);
Michal Krola904b492004-03-04 13:07:52 +00002876
2877 if (get_emtcode (&text, &ma))
2878 {
2879 grammar_load_state_destroy (&g);
Briandceae282007-01-19 12:02:31 -07002880 error_msg(__LINE__, "");
Michal Krola904b492004-03-04 13:07:52 +00002881 return 0;
2882 }
2883
Michal Krol904ef742004-10-20 14:54:17 +00002884 map_byte_append (&g->mapb, ma);
Michal Krola904b492004-03-04 13:07:52 +00002885 }
2886 /* .regbyte */
2887 else if (is_dot && str_equal (symbol, (byte *) "regbyte"))
2888 {
2889 map_byte *ma = NULL;
2890
Brian Paula6c423d2004-08-25 15:59:48 +00002891 mem_free ((void **) (void *) &symbol);
Michal Krola904b492004-03-04 13:07:52 +00002892
2893 if (get_regbyte (&text, &ma))
2894 {
2895 grammar_load_state_destroy (&g);
Briandceae282007-01-19 12:02:31 -07002896 error_msg(__LINE__, "");
Michal Krola904b492004-03-04 13:07:52 +00002897 return 0;
2898 }
2899
Michal Krol904ef742004-10-20 14:54:17 +00002900 map_byte_append (&g->di->m_regbytes, ma);
Michal Krola904b492004-03-04 13:07:52 +00002901 }
2902 /* .errtext */
2903 else if (is_dot && str_equal (symbol, (byte *) "errtext"))
2904 {
2905 map_str *ma = NULL;
2906
Brian Paula6c423d2004-08-25 15:59:48 +00002907 mem_free ((void **) (void *) &symbol);
Michal Krola904b492004-03-04 13:07:52 +00002908
2909 if (get_errtext (&text, &ma))
2910 {
2911 grammar_load_state_destroy (&g);
Briandceae282007-01-19 12:02:31 -07002912 error_msg(__LINE__, "");
Michal Krola904b492004-03-04 13:07:52 +00002913 return 0;
2914 }
2915
Michal Krol904ef742004-10-20 14:54:17 +00002916 map_str_append (&g->maps, ma);
Michal Krola904b492004-03-04 13:07:52 +00002917 }
2918 /* .string */
2919 else if (is_dot && str_equal (symbol, (byte *) "string"))
2920 {
Brian Paula6c423d2004-08-25 15:59:48 +00002921 mem_free ((void **) (void *) &symbol);
Michal Krola904b492004-03-04 13:07:52 +00002922
2923 if (g->di->m_string != NULL)
2924 {
2925 grammar_load_state_destroy (&g);
Briandceae282007-01-19 12:02:31 -07002926 error_msg(__LINE__, "");
Michal Krola904b492004-03-04 13:07:52 +00002927 return 0;
2928 }
2929
2930 if (get_identifier (&text, &g->string_symbol))
2931 {
2932 grammar_load_state_destroy (&g);
Briandceae282007-01-19 12:02:31 -07002933 error_msg(__LINE__, "");
Michal Krola904b492004-03-04 13:07:52 +00002934 return 0;
2935 }
2936
2937 /* skip semicolon */
2938 eat_spaces (&text);
2939 text++;
2940 eat_spaces (&text);
2941 }
2942 else
2943 {
2944 rule *ru = NULL;
2945 map_rule *ma = NULL;
2946
2947 if (get_rule (&text, &ru, g->maps, g->mapb))
2948 {
2949 grammar_load_state_destroy (&g);
Briandceae282007-01-19 12:02:31 -07002950 error_msg(__LINE__, "");
Michal Krola904b492004-03-04 13:07:52 +00002951 return 0;
2952 }
2953
Michal Krol904ef742004-10-20 14:54:17 +00002954 rule_append (&g->di->m_rulez, ru);
Michal Krola904b492004-03-04 13:07:52 +00002955
2956 /* if a rule consist of only one specifier, give it an ".and" operator */
2957 if (ru->m_oper == op_none)
2958 ru->m_oper = op_and;
2959
2960 map_rule_create (&ma);
2961 if (ma == NULL)
2962 {
2963 grammar_load_state_destroy (&g);
Briandceae282007-01-19 12:02:31 -07002964 error_msg(__LINE__, "");
Michal Krola904b492004-03-04 13:07:52 +00002965 return 0;
2966 }
2967
2968 ma->key = symbol;
2969 ma->data = ru;
Michal Krol904ef742004-10-20 14:54:17 +00002970 map_rule_append (&g->mapr, ma);
Michal Krola904b492004-03-04 13:07:52 +00002971 }
2972 }
2973
2974 if (update_dependencies (g->di, g->mapr, &g->syntax_symbol, &g->string_symbol,
2975 g->di->m_regbytes))
2976 {
2977 grammar_load_state_destroy (&g);
Briandceae282007-01-19 12:02:31 -07002978 error_msg(__LINE__, "update_dependencies() failed");
Michal Krola904b492004-03-04 13:07:52 +00002979 return 0;
2980 }
2981
Michal Krol904ef742004-10-20 14:54:17 +00002982 dict_append (&g_dicts, g->di);
Michal Krola904b492004-03-04 13:07:52 +00002983 id = g->di->m_id;
2984 g->di = NULL;
2985
2986 grammar_load_state_destroy (&g);
2987
2988 return id;
2989}
2990
2991int grammar_set_reg8 (grammar id, const byte *name, byte value)
2992{
2993 dict *di = NULL;
2994 map_byte *reg = NULL;
2995
2996 clear_last_error ();
2997
2998 dict_find (&g_dicts, id, &di);
2999 if (di == NULL)
3000 {
3001 set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
3002 return 0;
3003 }
3004
3005 reg = map_byte_locate (&di->m_regbytes, name);
3006 if (reg == NULL)
3007 {
3008 set_last_error (INVALID_REGISTER_NAME, str_duplicate (name), -1);
3009 return 0;
3010 }
3011
3012 reg->data = value;
3013 return 1;
3014}
3015
Michal Krol904ef742004-10-20 14:54:17 +00003016/*
3017 internal checking function used by both grammar_check and grammar_fast_check functions
3018*/
3019static int _grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size,
3020 unsigned int estimate_prod_size, int use_fast_path)
Michal Krola904b492004-03-04 13:07:52 +00003021{
3022 dict *di = NULL;
Michal Krola79d4e72006-10-19 08:07:00 +00003023 int index = 0;
Michal Krola904b492004-03-04 13:07:52 +00003024
3025 clear_last_error ();
3026
3027 dict_find (&g_dicts, id, &di);
3028 if (di == NULL)
3029 {
3030 set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
3031 return 0;
3032 }
3033
Michal Krola904b492004-03-04 13:07:52 +00003034 *prod = NULL;
3035 *size = 0;
3036
Michal Krol904ef742004-10-20 14:54:17 +00003037 if (use_fast_path)
Michal Krola904b492004-03-04 13:07:52 +00003038 {
Michal Krol904ef742004-10-20 14:54:17 +00003039 regbyte_ctx *rbc = NULL;
3040 bytepool *bp = NULL;
3041 int _P = 0;
3042
3043 bytepool_create (&bp, estimate_prod_size);
3044 if (bp == NULL)
3045 return 0;
3046
3047 if (fast_match (di, text, &index, di->m_syntax, &_P, bp, 0, &rbc) != mr_matched)
3048 {
3049 bytepool_destroy (&bp);
3050 free_regbyte_ctx_stack (rbc, NULL);
3051 return 0;
3052 }
3053
Michal Krola904b492004-03-04 13:07:52 +00003054 free_regbyte_ctx_stack (rbc, NULL);
Michal Krol904ef742004-10-20 14:54:17 +00003055
3056 *prod = bp->_F;
3057 *size = _P;
3058 bp->_F = NULL;
3059 bytepool_destroy (&bp);
Michal Krola904b492004-03-04 13:07:52 +00003060 }
Michal Krol904ef742004-10-20 14:54:17 +00003061 else
Michal Krola904b492004-03-04 13:07:52 +00003062 {
Michal Krol904ef742004-10-20 14:54:17 +00003063 regbyte_ctx *rbc = NULL;
3064 barray *ba = NULL;
Michal Krola904b492004-03-04 13:07:52 +00003065
Michal Krol904ef742004-10-20 14:54:17 +00003066 barray_create (&ba);
3067 if (ba == NULL)
3068 return 0;
3069
3070 if (match (di, text, &index, di->m_syntax, &ba, 0, &rbc) != mr_matched)
3071 {
3072 barray_destroy (&ba);
3073 free_regbyte_ctx_stack (rbc, NULL);
3074 return 0;
3075 }
3076
3077 free_regbyte_ctx_stack (rbc, NULL);
3078
3079 *prod = (byte *) mem_alloc (ba->len * sizeof (byte));
3080 if (*prod == NULL)
3081 {
3082 barray_destroy (&ba);
3083 return 0;
3084 }
3085
3086 mem_copy (*prod, ba->data, ba->len * sizeof (byte));
3087 *size = ba->len;
3088 barray_destroy (&ba);
3089 }
Michal Krola904b492004-03-04 13:07:52 +00003090
3091 return 1;
3092}
3093
Michal Krol904ef742004-10-20 14:54:17 +00003094int grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size)
3095{
3096 return _grammar_check (id, text, prod, size, 0, 0);
3097}
3098
3099int grammar_fast_check (grammar id, const byte *text, byte **prod, unsigned int *size,
3100 unsigned int estimate_prod_size)
3101{
3102 return _grammar_check (id, text, prod, size, estimate_prod_size, 1);
3103}
3104
Michal Krola904b492004-03-04 13:07:52 +00003105int grammar_destroy (grammar id)
3106{
3107 dict **di = &g_dicts;
3108
3109 clear_last_error ();
3110
3111 while (*di != NULL)
3112 {
3113 if ((**di).m_id == id)
3114 {
3115 dict *tmp = *di;
Michal Krol904ef742004-10-20 14:54:17 +00003116 *di = (**di).next;
Michal Krola904b492004-03-04 13:07:52 +00003117 dict_destroy (&tmp);
3118 return 1;
3119 }
3120
Michal Krol904ef742004-10-20 14:54:17 +00003121 di = &(**di).next;
Michal Krola904b492004-03-04 13:07:52 +00003122 }
3123
3124 set_last_error (INVALID_GRAMMAR_ID, NULL, -1);
3125 return 0;
3126}
3127
Michal Krol904ef742004-10-20 14:54:17 +00003128static void append_character (const char x, byte *text, int *dots_made, int *len, int size)
3129{
3130 if (*dots_made == 0)
3131 {
3132 if (*len < size - 1)
3133 {
3134 text[(*len)++] = x;
3135 text[*len] = '\0';
3136 }
3137 else
3138 {
3139 int i;
3140 for (i = 0; i < 3; i++)
3141 if (--(*len) >= 0)
3142 text[*len] = '.';
3143 *dots_made = 1;
3144 }
3145 }
3146}
3147
Michal Krola904b492004-03-04 13:07:52 +00003148void grammar_get_last_error (byte *text, unsigned int size, int *pos)
3149{
Brian Paulbd997cd2004-07-20 21:12:56 +00003150 int len = 0, dots_made = 0;
Michal Krola904b492004-03-04 13:07:52 +00003151 const byte *p = error_message;
3152
3153 *text = '\0';
3154
Michal Krol904ef742004-10-20 14:54:17 +00003155 if (p)
3156 {
3157 while (*p)
3158 {
3159 if (*p == '$')
3160 {
Michal Krola904b492004-03-04 13:07:52 +00003161 const byte *r = error_param;
3162
Michal Krol904ef742004-10-20 14:54:17 +00003163 while (*r)
3164 {
3165 append_character (*r++, text, &dots_made, &len, (int) size);
Michal Krola904b492004-03-04 13:07:52 +00003166 }
Michal Krol904ef742004-10-20 14:54:17 +00003167
Michal Krola904b492004-03-04 13:07:52 +00003168 p++;
3169 }
Michal Krol904ef742004-10-20 14:54:17 +00003170 else
3171 {
3172 append_character (*p++, text, &dots_made, &len, size);
Michal Krola904b492004-03-04 13:07:52 +00003173 }
Michal Krol904ef742004-10-20 14:54:17 +00003174 }
Brian Paul289ffee2004-10-02 15:56:50 +00003175 }
Michal Krol904ef742004-10-20 14:54:17 +00003176
Michal Krola904b492004-03-04 13:07:52 +00003177 *pos = error_position;
Michal Krola904b492004-03-04 13:07:52 +00003178}