blob: 903a6ab1cfe2935fb2dd574dcc45757bb5ce634f [file] [log] [blame]
Guido van Rossumb674c3b1992-01-19 16:32:47 +00001/*
2
3regexpr.c
4
5Author: Tatu Ylonen <ylo@ngs.fi>
6
7Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
8
9Permission to use, copy, modify, distribute, and sell this software
10and its documentation for any purpose is hereby granted without fee,
11provided that the above copyright notice appear in all copies. This
12software is provided "as is" without express or implied warranty.
13
14Created: Thu Sep 26 17:14:05 1991 ylo
15Last modified: Mon Nov 4 17:06:48 1991 ylo
16
17This code draws many ideas from the regular expression packages by
18Henry Spencer of the University of Toronto and Richard Stallman of the
19Free Software Foundation.
20
21Emacs-specific code and syntax table code is almost directly borrowed
22from GNU regexp.
23
24$Header$
25
26*/
27
28#include <stdio.h>
29#include <assert.h>
30#include "regexpr.h"
31
32char *malloc();
33void free();
34char *realloc();
35
36#define MACRO_BEGIN do {
37#define MACRO_END } while (0)
38
39enum regexp_compiled_ops /* opcodes for compiled regexp */
40{
41 Cend, /* end of pattern reached */
42 Cbol, /* beginning of line */
43 Ceol, /* end of line */
44 Cset, /* character set. Followed by 32 bytes of set. */
45 Cexact, /* followed by a byte to match */
46 Canychar, /* matches any character except newline */
47 Cstart_memory, /* set register start addr (followed by reg number) */
48 Cend_memory, /* set register end addr (followed by reg number) */
49 Cmatch_memory, /* match a duplicate of reg contents (regnum follows)*/
50 Cjump, /* followed by two bytes (lsb,msb) of displacement. */
51 Cstar_jump, /* will change to jump/update_failure_jump at runtime */
52 Cfailure_jump, /* jump to addr on failure */
53 Cupdate_failure_jump, /* update topmost failure point and jump */
54 Cdummy_failure_jump, /* push a dummy failure point and jump */
55 Cbegbuf, /* match at beginning of buffer */
56 Cendbuf, /* match at end of buffer */
57 Cwordbeg, /* match at beginning of word */
58 Cwordend, /* match at end of word */
59 Cwordbound, /* match if at word boundary */
60 Cnotwordbound, /* match if not at word boundary */
61#ifdef emacs
62 Cemacs_at_dot, /* emacs only: matches at dot */
63#endif /* emacs */
64 Csyntaxspec, /* matches syntax code (1 byte follows) */
65 Cnotsyntaxspec /* matches if syntax code does not match (1 byte foll)*/
66};
67
68enum regexp_syntax_op /* syntax codes for plain and quoted characters */
69{
70 Rend, /* special code for end of regexp */
71 Rnormal, /* normal character */
72 Ranychar, /* any character except newline */
73 Rquote, /* the quote character */
74 Rbol, /* match beginning of line */
75 Reol, /* match end of line */
76 Roptional, /* match preceding expression optionally */
77 Rstar, /* match preceding expr zero or more times */
78 Rplus, /* match preceding expr one or more times */
79 Ror, /* match either of alternatives */
80 Ropenpar, /* opening parenthesis */
81 Rclosepar, /* closing parenthesis */
82 Rmemory, /* match memory register */
83 Rextended_memory, /* \vnn to match registers 10-99 */
84 Ropenset, /* open set. Internal syntax hard-coded below. */
85 /* the following are gnu extensions to "normal" regexp syntax */
86 Rbegbuf, /* beginning of buffer */
87 Rendbuf, /* end of buffer */
88 Rwordchar, /* word character */
89 Rnotwordchar, /* not word character */
90 Rwordbeg, /* beginning of word */
91 Rwordend, /* end of word */
92 Rwordbound, /* word bound */
93 Rnotwordbound, /* not word bound */
94#ifdef emacs
95 Remacs_at_dot, /* emacs: at dot */
96 Remacs_syntaxspec, /* syntaxspec */
97 Remacs_notsyntaxspec, /* notsyntaxspec */
98#endif /* emacs */
99 Rnum_ops
100};
101
102static int re_compile_initialized = 0;
103static int regexp_syntax = 0;
104static unsigned char regexp_plain_ops[256];
105static unsigned char regexp_quoted_ops[256];
106static unsigned char regexp_precedences[Rnum_ops];
107static int regexp_context_indep_ops;
108static int regexp_ansi_sequences;
109
110#define NUM_LEVELS 5 /* number of precedence levels in use */
111#define MAX_NESTING 100 /* max nesting level of operators */
112
113#ifdef emacs
114
115/* This code is for emacs compatibility only. */
116
117#include "config.h"
118#include "lisp.h"
119#include "buffer.h"
120#include "syntax.h"
121
122/* emacs defines NULL in some strange way? */
123#undef NULL
124#define NULL 0
125
126#else /* emacs */
127
128#define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)]
129#define Sword 1
130
131#ifdef SYNTAX_TABLE
132char *re_syntax_table;
133#else
134static char re_syntax_table[256];
135#endif /* SYNTAX_TABLE */
136
137#endif /* emacs */
138
139static void re_compile_initialize()
140{
141 int a;
142
143#if !defined(emacs) && !defined(SYNTAX_TABLE)
144 static int syntax_table_inited = 0;
145
146 if (!syntax_table_inited)
147 {
148 syntax_table_inited = 1;
149 memset(re_syntax_table, 0, 256);
150 for (a = 'a'; a <= 'z'; a++)
151 re_syntax_table[a] = Sword;
152 for (a = 'A'; a <= 'Z'; a++)
153 re_syntax_table[a] = Sword;
154 for (a = '0'; a <= '9'; a++)
155 re_syntax_table[a] = Sword;
156 }
157#endif /* !emacs && !SYNTAX_TABLE */
158 re_compile_initialized = 1;
159 for (a = 0; a < 256; a++)
160 {
161 regexp_plain_ops[a] = Rnormal;
162 regexp_quoted_ops[a] = Rnormal;
163 }
164 for (a = '0'; a <= '9'; a++)
165 regexp_quoted_ops[a] = Rmemory;
166 regexp_plain_ops['\134'] = Rquote;
167 if (regexp_syntax & RE_NO_BK_PARENS)
168 {
169 regexp_plain_ops['('] = Ropenpar;
170 regexp_plain_ops[')'] = Rclosepar;
171 }
172 else
173 {
174 regexp_quoted_ops['('] = Ropenpar;
175 regexp_quoted_ops[')'] = Rclosepar;
176 }
177 if (regexp_syntax & RE_NO_BK_VBAR)
178 regexp_plain_ops['\174'] = Ror;
179 else
180 regexp_quoted_ops['\174'] = Ror;
181 regexp_plain_ops['*'] = Rstar;
182 if (regexp_syntax & RE_BK_PLUS_QM)
183 {
184 regexp_quoted_ops['+'] = Rplus;
185 regexp_quoted_ops['?'] = Roptional;
186 }
187 else
188 {
189 regexp_plain_ops['+'] = Rplus;
190 regexp_plain_ops['?'] = Roptional;
191 }
192 if (regexp_syntax & RE_NEWLINE_OR)
193 regexp_plain_ops['\n'] = Ror;
194 regexp_plain_ops['\133'] = Ropenset;
195 regexp_plain_ops['\136'] = Rbol;
196 regexp_plain_ops['$'] = Reol;
197 regexp_plain_ops['.'] = Ranychar;
198 if (!(regexp_syntax & RE_NO_GNU_EXTENSIONS))
199 {
200#ifdef emacs
201 regexp_quoted_ops['='] = Remacs_at_dot;
202 regexp_quoted_ops['s'] = Remacs_syntaxspec;
203 regexp_quoted_ops['S'] = Remacs_notsyntaxspec;
204#endif /* emacs */
205 regexp_quoted_ops['w'] = Rwordchar;
206 regexp_quoted_ops['W'] = Rnotwordchar;
207 regexp_quoted_ops['<'] = Rwordbeg;
208 regexp_quoted_ops['>'] = Rwordend;
209 regexp_quoted_ops['b'] = Rwordbound;
210 regexp_quoted_ops['B'] = Rnotwordbound;
211 regexp_quoted_ops['`'] = Rbegbuf;
212 regexp_quoted_ops['\''] = Rendbuf;
213 }
214 if (regexp_syntax & RE_ANSI_HEX)
215 regexp_quoted_ops['v'] = Rextended_memory;
216 for (a = 0; a < Rnum_ops; a++)
217 regexp_precedences[a] = 4;
218 if (regexp_syntax & RE_TIGHT_VBAR)
219 {
220 regexp_precedences[Ror] = 3;
221 regexp_precedences[Rbol] = 2;
222 regexp_precedences[Reol] = 2;
223 }
224 else
225 {
226 regexp_precedences[Ror] = 2;
227 regexp_precedences[Rbol] = 3;
228 regexp_precedences[Reol] = 3;
229 }
230 regexp_precedences[Rclosepar] = 1;
231 regexp_precedences[Rend] = 0;
232 regexp_context_indep_ops = (regexp_syntax & RE_CONTEXT_INDEP_OPS) != 0;
233 regexp_ansi_sequences = (regexp_syntax & RE_ANSI_HEX) != 0;
234}
235
236int re_set_syntax(syntax)
237int syntax;
238{
239 int ret;
240
241 ret = regexp_syntax;
242 regexp_syntax = syntax;
243 re_compile_initialize();
244 return ret;
245}
246
247static int hex_char_to_decimal(ch)
248int ch;
249{
250 if (ch >= '0' && ch <= '9')
251 return ch - '0';
252 if (ch >= 'a' && ch <= 'f')
253 return ch - 'a' + 10;
254 if (ch >= 'A' && ch <= 'F')
255 return ch - 'A' + 10;
256 return 16;
257}
258
259char *re_compile_pattern(regex, size, bufp)
260char *regex;
261int size;
262regexp_t bufp;
263{
264 int a, pos, op, current_level, level, opcode;
265 int pattern_offset, alloc;
266 int starts[NUM_LEVELS * MAX_NESTING], starts_base;
267 int future_jumps[MAX_NESTING], num_jumps;
268 unsigned char ch;
269 char *pattern, *translate;
270 int next_register, paren_depth, num_open_registers, open_registers[RE_NREGS];
271 int beginning_context;
272
273#define NEXTCHAR(var) \
274 MACRO_BEGIN \
275 if (pos >= size) \
276 goto ends_prematurely; \
277 (var) = regex[pos]; \
278 pos++; \
279 MACRO_END
280
281#define ALLOC(amount) \
282 MACRO_BEGIN \
283 if (pattern_offset+(amount) > alloc) \
284 { \
285 alloc += 256 + (amount); \
286 pattern = realloc(pattern, alloc); \
287 if (!pattern) \
288 goto out_of_memory; \
289 } \
290 MACRO_END
291
292#define STORE(ch) pattern[pattern_offset++] = (ch)
293
294#define CURRENT_LEVEL_START (starts[starts_base + current_level])
295
296#define SET_LEVEL_START starts[starts_base + current_level] = pattern_offset
297
298#define PUSH_LEVEL_STARTS if (starts_base < (MAX_NESTING-1)*NUM_LEVELS) \
299 starts_base += NUM_LEVELS; \
300 else \
301 goto too_complex
302
303#define POP_LEVEL_STARTS starts_base -= NUM_LEVELS
304
305#define PUT_ADDR(offset,addr) \
306 MACRO_BEGIN \
307 int disp = (addr) - (offset) - 2; \
308 pattern[(offset)] = disp & 0xff; \
309 pattern[(offset)+1] = (disp>>8) & 0xff; \
310 MACRO_END
311
312#define INSERT_JUMP(pos,type,addr) \
313 MACRO_BEGIN \
314 int a, p = (pos), t = (type), ad = (addr); \
315 for (a = pattern_offset - 1; a >= p; a--) \
316 pattern[a + 3] = pattern[a]; \
317 pattern[p] = t; \
318 PUT_ADDR(p+1,ad); \
319 pattern_offset += 3; \
320 MACRO_END
321
322#define SETBIT(buf,offset,bit) (buf)[(offset)+(bit)/8] |= (1<<((bit) & 7))
323
324#define SET_FIELDS \
325 MACRO_BEGIN \
326 bufp->allocated = alloc; \
327 bufp->buffer = pattern; \
328 bufp->used = pattern_offset; \
329 MACRO_END
330
331#define GETHEX(var) \
332 MACRO_BEGIN \
333 char gethex_ch, gethex_value; \
334 NEXTCHAR(gethex_ch); \
335 gethex_value = hex_char_to_decimal(gethex_ch); \
336 if (gethex_value == 16) \
337 goto hex_error; \
338 NEXTCHAR(gethex_ch); \
339 gethex_ch = hex_char_to_decimal(gethex_ch); \
340 if (gethex_ch == 16) \
341 goto hex_error; \
342 (var) = gethex_value * 16 + gethex_ch; \
343 MACRO_END
344
345#define ANSI_TRANSLATE(ch) \
346 MACRO_BEGIN \
347 switch (ch) \
348 { \
349 case 'a': \
350 case 'A': \
351 ch = 7; /* audible bell */ \
352 break; \
353 case 'b': \
354 case 'B': \
355 ch = 8; /* backspace */ \
356 break; \
357 case 'f': \
358 case 'F': \
359 ch = 12; /* form feed */ \
360 break; \
361 case 'n': \
362 case 'N': \
363 ch = 10; /* line feed */ \
364 break; \
365 case 'r': \
366 case 'R': \
367 ch = 13; /* carriage return */ \
368 break; \
369 case 't': \
370 case 'T': \
371 ch = 9; /* tab */ \
372 break; \
373 case 'v': \
374 case 'V': \
375 ch = 11; /* vertical tab */ \
376 break; \
377 case 'x': /* hex code */ \
378 case 'X': \
379 GETHEX(ch); \
380 break; \
381 default: \
382 /* other characters passed through */ \
383 if (translate) \
384 ch = translate[(unsigned char)ch]; \
385 break; \
386 } \
387 MACRO_END
388
389 if (!re_compile_initialized)
390 re_compile_initialize();
391 bufp->used = 0;
392 bufp->fastmap_accurate = 0;
393 bufp->uses_registers = 0;
394 translate = bufp->translate;
395 pattern = bufp->buffer;
396 alloc = bufp->allocated;
397 if (alloc == 0 || pattern == NULL)
398 {
399 alloc = 256;
400 pattern = malloc(alloc);
401 if (!pattern)
402 goto out_of_memory;
403 }
404 pattern_offset = 0;
405 starts_base = 0;
406 num_jumps = 0;
407 current_level = 0;
408 SET_LEVEL_START;
409 num_open_registers = 0;
410 next_register = 1;
411 paren_depth = 0;
412 beginning_context = 1;
413 op = -1;
414 /* we use Rend dummy to ensure that pending jumps are updated (due to
415 low priority of Rend) before exiting the loop. */
416 pos = 0;
417 while (op != Rend)
418 {
419 if (pos >= size)
420 op = Rend;
421 else
422 {
423 NEXTCHAR(ch);
424 if (translate)
425 ch = translate[(unsigned char)ch];
426 op = regexp_plain_ops[(unsigned char)ch];
427 if (op == Rquote)
428 {
429 NEXTCHAR(ch);
430 op = regexp_quoted_ops[(unsigned char)ch];
431 if (op == Rnormal && regexp_ansi_sequences)
432 ANSI_TRANSLATE(ch);
433 }
434 }
435 level = regexp_precedences[op];
436 /* printf("ch='%c' op=%d level=%d current_level=%d curlevstart=%d\n",
437 ch, op, level, current_level, CURRENT_LEVEL_START); */
438 if (level > current_level)
439 {
440 for (current_level++; current_level < level; current_level++)
441 SET_LEVEL_START;
442 SET_LEVEL_START;
443 }
444 else
445 if (level < current_level)
446 {
447 current_level = level;
448 for (;num_jumps > 0 &&
449 future_jumps[num_jumps-1] >= CURRENT_LEVEL_START;
450 num_jumps--)
451 PUT_ADDR(future_jumps[num_jumps-1], pattern_offset);
452 }
453 switch (op)
454 {
455 case Rend:
456 break;
457 case Rnormal:
458 normal_char:
459 opcode = Cexact;
460 store_opcode_and_arg: /* opcode & ch must be set */
461 SET_LEVEL_START;
462 ALLOC(2);
463 STORE(opcode);
464 STORE(ch);
465 break;
466 case Ranychar:
467 opcode = Canychar;
468 store_opcode:
469 SET_LEVEL_START;
470 ALLOC(1);
471 STORE(opcode);
472 break;
473 case Rquote:
474 abort();
475 /*NOTREACHED*/
476 case Rbol:
477 if (!beginning_context)
478 if (regexp_context_indep_ops)
479 goto op_error;
480 else
481 goto normal_char;
482 opcode = Cbol;
483 goto store_opcode;
484 case Reol:
485 if (!((pos >= size) ||
486 ((regexp_syntax & RE_NO_BK_VBAR) ?
487 (regex[pos] == '\174') :
488 (pos+1 < size && regex[pos] == '\134' &&
489 regex[pos+1] == '\174')) ||
490 ((regexp_syntax & RE_NO_BK_PARENS)?
491 (regex[pos] == ')'):
492 (pos+1 < size && regex[pos] == '\134' &&
493 regex[pos+1] == ')'))))
494 if (regexp_context_indep_ops)
495 goto op_error;
496 else
497 goto normal_char;
498 opcode = Ceol;
499 goto store_opcode;
500 break;
501 case Roptional:
502 if (beginning_context)
503 if (regexp_context_indep_ops)
504 goto op_error;
505 else
506 goto normal_char;
507 if (CURRENT_LEVEL_START == pattern_offset)
508 break; /* ignore empty patterns for ? */
509 ALLOC(3);
510 INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
511 pattern_offset + 3);
512 break;
513 case Rstar:
514 case Rplus:
515 if (beginning_context)
516 if (regexp_context_indep_ops)
517 goto op_error;
518 else
519 goto normal_char;
520 if (CURRENT_LEVEL_START == pattern_offset)
521 break; /* ignore empty patterns for + and * */
522 ALLOC(9);
523 INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
524 pattern_offset + 6);
525 INSERT_JUMP(pattern_offset, Cstar_jump, CURRENT_LEVEL_START);
526 if (op == Rplus) /* jump over initial failure_jump */
527 INSERT_JUMP(CURRENT_LEVEL_START, Cdummy_failure_jump,
528 CURRENT_LEVEL_START + 6);
529 break;
530 case Ror:
531 ALLOC(6);
532 INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
533 pattern_offset + 6);
534 if (num_jumps >= MAX_NESTING)
535 goto too_complex;
536 STORE(Cjump);
537 future_jumps[num_jumps++] = pattern_offset;
538 STORE(0);
539 STORE(0);
540 SET_LEVEL_START;
541 break;
542 case Ropenpar:
543 SET_LEVEL_START;
544 if (next_register < RE_NREGS)
545 {
546 bufp->uses_registers = 1;
547 ALLOC(2);
548 STORE(Cstart_memory);
549 STORE(next_register);
550 open_registers[num_open_registers++] = next_register;
551 next_register++;
552 }
553 paren_depth++;
554 PUSH_LEVEL_STARTS;
555 current_level = 0;
556 SET_LEVEL_START;
557 break;
558 case Rclosepar:
559 if (paren_depth <= 0)
560 goto parenthesis_error;
561 POP_LEVEL_STARTS;
562 current_level = regexp_precedences[Ropenpar];
563 paren_depth--;
564 if (paren_depth < num_open_registers)
565 {
566 bufp->uses_registers = 1;
567 ALLOC(2);
568 STORE(Cend_memory);
569 num_open_registers--;
570 STORE(open_registers[num_open_registers]);
571 }
572 break;
573 case Rmemory:
574 if (ch == '0')
575 goto bad_match_register;
576 assert(ch >= '0' && ch <= '9');
577 bufp->uses_registers = 1;
578 opcode = Cmatch_memory;
579 ch -= '0';
580 goto store_opcode_and_arg;
581 case Rextended_memory:
582 NEXTCHAR(ch);
583 if (ch < '0' || ch > '9')
584 goto bad_match_register;
585 NEXTCHAR(a);
586 if (a < '0' || a > '9')
587 goto bad_match_register;
588 ch = 10 * (a - '0') + ch - '0';
589 if (ch <= 0 || ch >= RE_NREGS)
590 goto bad_match_register;
591 bufp->uses_registers = 1;
592 opcode = Cmatch_memory;
593 goto store_opcode_and_arg;
594 case Ropenset:
595 {
596 int complement,prev,offset,range,firstchar;
597
598 SET_LEVEL_START;
599 ALLOC(1+256/8);
600 STORE(Cset);
601 offset = pattern_offset;
602 for (a = 0; a < 256/8; a++)
603 STORE(0);
604 NEXTCHAR(ch);
605 if (translate)
606 ch = translate[(unsigned char)ch];
607 if (ch == '\136')
608 {
609 complement = 1;
610 NEXTCHAR(ch);
611 if (translate)
612 ch = translate[(unsigned char)ch];
613 }
614 else
615 complement = 0;
616 prev = -1;
617 range = 0;
618 firstchar = 1;
619 while (ch != '\135' || firstchar)
620 {
621 firstchar = 0;
622 if (regexp_ansi_sequences && ch == '\134')
623 {
624 NEXTCHAR(ch);
625 ANSI_TRANSLATE(ch);
626 }
627 if (range)
628 {
629 for (a = prev; a <= ch; a++)
630 SETBIT(pattern, offset, a);
631 prev = -1;
632 range = 0;
633 }
634 else
635 if (prev != -1 && ch == '-')
636 range = 1;
637 else
638 {
639 SETBIT(pattern, offset, ch);
640 prev = ch;
641 }
642 NEXTCHAR(ch);
643 if (translate)
644 ch = translate[(unsigned char)ch];
645 }
646 if (range)
647 SETBIT(pattern, offset, '-');
648 if (complement)
649 {
650 for (a = 0; a < 256/8; a++)
651 pattern[offset+a] ^= 0xff;
652 }
653 break;
654 }
655 case Rbegbuf:
656 opcode = Cbegbuf;
657 goto store_opcode;
658 case Rendbuf:
659 opcode = Cendbuf;
660 goto store_opcode;
661 case Rwordchar:
662 opcode = Csyntaxspec;
663 ch = Sword;
664 goto store_opcode_and_arg;
665 case Rnotwordchar:
666 opcode = Cnotsyntaxspec;
667 ch = Sword;
668 goto store_opcode_and_arg;
669 case Rwordbeg:
670 opcode = Cwordbeg;
671 goto store_opcode;
672 case Rwordend:
673 opcode = Cwordend;
674 goto store_opcode;
675 case Rwordbound:
676 opcode = Cwordbound;
677 goto store_opcode;
678 case Rnotwordbound:
679 opcode = Cnotwordbound;
680 goto store_opcode;
681#ifdef emacs
682 case Remacs_at_dot:
683 opcode = Cemacs_at_dot;
684 goto store_opcode;
685 case Remacs_syntaxspec:
686 NEXTCHAR(ch);
687 if (translate)
688 ch = translate[(unsigned char)ch];
689 opcode = Csyntaxspec;
690 ch = syntax_spec_code[(unsigned char)ch];
691 goto store_opcode_and_arg;
692 case Remacs_notsyntaxspec:
693 NEXTCHAR(ch);
694 if (translate)
695 ch = translate[(unsigned char)ch];
696 opcode = Cnotsyntaxspec;
697 ch = syntax_spec_code[(unsigned char)ch];
698 goto store_opcode_and_arg;
699#endif /* emacs */
700 default:
701 abort();
702 }
703 beginning_context = (op == Ropenpar || op == Ror);
704 }
705 if (starts_base != 0)
706 goto parenthesis_error;
707 assert(num_jumps == 0);
708 ALLOC(1);
709 STORE(Cend);
710 SET_FIELDS;
711 return NULL;
712
713 op_error:
714 SET_FIELDS;
715 return "Badly placed special character";
716
717 bad_match_register:
718 SET_FIELDS;
719 return "Bad match register number";
720
721 hex_error:
722 SET_FIELDS;
723 return "Bad hexadecimal number";
724
725 parenthesis_error:
726 SET_FIELDS;
727 return "Badly placed parenthesis";
728
729 out_of_memory:
730 SET_FIELDS;
731 return "Out of memory";
732
733 ends_prematurely:
734 SET_FIELDS;
735 return "Regular expression ends prematurely";
736
737 too_complex:
738 SET_FIELDS;
739 return "Regular expression too complex";
740}
741#undef CHARAT
742#undef NEXTCHAR
743#undef GETHEX
744#undef ALLOC
745#undef STORE
746#undef CURRENT_LEVEL_START
747#undef SET_LEVEL_START
748#undef PUSH_LEVEL_STARTS
749#undef POP_LEVEL_STARTS
750#undef PUT_ADDR
751#undef INSERT_JUMP
752#undef SETBIT
753#undef SET_FIELDS
754
755static void re_compile_fastmap_aux(code, pos, visited, can_be_null, fastmap)
756char *code, *visited, *can_be_null, *fastmap;
757int pos;
758{
759 int a, b, syntaxcode;
760
761 if (visited[pos])
762 return; /* we have already been here */
763 visited[pos] = 1;
764 for (;;)
765 switch (code[pos++])
766 {
767 case Cend:
768 *can_be_null = 1;
769 return;
770 case Cbol:
771 case Cbegbuf:
772 case Cendbuf:
773 case Cwordbeg:
774 case Cwordend:
775 case Cwordbound:
776 case Cnotwordbound:
777#ifdef emacs
778 case Cemacs_at_dot:
779#endif /* emacs */
780 break;
781 case Csyntaxspec:
782 syntaxcode = code[pos++];
783 for (a = 0; a < 256; a++)
784 if (SYNTAX(a) == syntaxcode)
785 fastmap[a] = 1;
786 return;
787 case Cnotsyntaxspec:
788 syntaxcode = code[pos++];
789 for (a = 0; a < 256; a++)
790 if (SYNTAX(a) != syntaxcode)
791 fastmap[a] = 1;
792 return;
793 case Ceol:
794 fastmap['\n'] = 1;
795 if (*can_be_null == 0)
796 *can_be_null = 2; /* can match null, but only at end of buffer*/
797 return;
798 case Cset:
799 for (a = 0; a < 256/8; a++)
800 if (code[pos + a] != 0)
801 for (b = 0; b < 8; b++)
802 if (code[pos + a] & (1 << b))
803 fastmap[(a << 3) + b] = 1;
804 pos += 256/8;
805 return;
806 case Cexact:
807 fastmap[(unsigned char)code[pos]] = 1;
808 return;
809 case Canychar:
810 for (a = 0; a < 256; a++)
811 if (a != '\n')
812 fastmap[a] = 1;
813 return;
814 case Cstart_memory:
815 case Cend_memory:
816 pos++;
817 break;
818 case Cmatch_memory:
819 /* should this ever happen for sensible patterns??? */
820 *can_be_null = 1;
821 return;
822 case Cjump:
823 case Cdummy_failure_jump:
824 case Cupdate_failure_jump:
825 case Cstar_jump:
826 a = (unsigned char)code[pos++];
827 a |= (unsigned char)code[pos++] << 8;
828 pos += (int)(short)a;
829 if (visited[pos])
830 {
831 /* argh... the regexp contains empty loops. This is not
832 good, as this may cause a failure stack overflow when
833 matching. Oh well. */
834 /* this path leads nowhere; pursue other paths. */
835 return;
836 }
837 visited[pos] = 1;
838 break;
839 case Cfailure_jump:
840 a = (unsigned char)code[pos++];
841 a |= (unsigned char)code[pos++] << 8;
842 a = pos + (int)(short)a;
843 re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap);
844 break;
845 default:
846 abort(); /* probably some opcode is missing from this switch */
847 /*NOTREACHED*/
848 }
849}
850
851static int re_do_compile_fastmap(buffer, used, pos, can_be_null, fastmap)
852char *buffer, *fastmap, *can_be_null;
853int used, pos;
854{
855 char small_visited[512], *visited;
856
857 if (used <= sizeof(small_visited))
858 visited = small_visited;
859 else
860 {
861 visited = malloc(used);
862 if (!visited)
863 return 0;
864 }
865 *can_be_null = 0;
866 memset(fastmap, 0, 256);
867 memset(visited, 0, used);
868 re_compile_fastmap_aux(buffer, pos, visited, can_be_null, fastmap);
869 if (visited != small_visited)
870 free(visited);
871 return 1;
872}
873
874void re_compile_fastmap(bufp)
875regexp_t bufp;
876{
877 if (!bufp->fastmap || bufp->fastmap_accurate)
878 return;
879 assert(bufp->used > 0);
880 if (!re_do_compile_fastmap(bufp->buffer, bufp->used, 0, &bufp->can_be_null,
881 bufp->fastmap))
882 return;
883 if (bufp->buffer[0] == Cbol)
884 bufp->anchor = 1; /* begline */
885 else
886 if (bufp->buffer[0] == Cbegbuf)
887 bufp->anchor = 2; /* begbuf */
888 else
889 bufp->anchor = 0; /* none */
890 bufp->fastmap_accurate = 1;
891}
892
893#define INITIAL_FAILURES 128 /* initial # failure points to allocate */
894#define MAX_FAILURES 4100 /* max # of failure points before failing */
895
896int re_match_2(bufp, string1, size1, string2, size2, pos, regs, mstop)
897regexp_t bufp;
898char *string1, *string2;
899int size1, size2, pos, mstop;
900regexp_registers_t regs;
901{
902 struct failure_point { char *text, *partend, *code; }
903 *failure_stack_start, *failure_sp, *failure_stack_end,
904 initial_failure_stack[INITIAL_FAILURES];
905 char *code, *translate, *text, *textend, *partend, *part_2_end;
906 char *regstart_text[RE_NREGS], *regstart_partend[RE_NREGS];
907 char *regend_text[RE_NREGS], *regend_partend[RE_NREGS];
908 int a, b, ch, reg, regch, match_end;
909 char *regtext, *regpartend, *regtextend;
910
911#define PREFETCH \
912 MACRO_BEGIN \
913 if (text == partend) \
914 { \
915 if (text == textend) \
916 goto fail; \
917 text = string2; \
918 partend = part_2_end; \
919 } \
920 MACRO_END
921
922#define NEXTCHAR(var) \
923 MACRO_BEGIN \
924 PREFETCH; \
925 (var) = (unsigned char)*text++; \
926 if (translate) \
927 (var) = (unsigned char)translate[(var)]; \
928 MACRO_END
929
930 assert(pos >= 0 && size1 >= 0 && size2 >= 0 && mstop >= 0);
931 assert(mstop <= size1 + size2);
932 assert(pos <= mstop);
933
934 if (pos <= size1)
935 {
936 text = string1 + pos;
937 if (mstop <= size1)
938 {
939 partend = string1 + mstop;
940 textend = partend;
941 }
942 else
943 {
944 partend = string1 + size1;
945 textend = string2 + mstop - size1;
946 }
947 part_2_end = string2 + mstop - size1;
948 }
949 else
950 {
951 text = string2 + pos - size1;
952 partend = string2 + mstop - size1;
953 textend = partend;
954 part_2_end = partend;
955 }
956
957 if (bufp->uses_registers && regs != NULL)
958 for (a = 0; a < RE_NREGS; a++)
959 regend_text[a] = NULL;
960
961 code = bufp->buffer;
962 translate = bufp->translate;
963 failure_stack_start = failure_sp = initial_failure_stack;
964 failure_stack_end = initial_failure_stack + INITIAL_FAILURES;
965
966#if 0
967 /* re_search_2 has already done this, and otherwise we get little benefit
968 from this. So I'll leave this out. */
969 if (bufp->fastmap_accurate && !bufp->can_be_null &&
970 text != textend &&
971 !bufp->fastmap[translate ?
972 (unsigned char)translate[(unsigned char)*text] :
973 (unsigned char)*text])
974 return -1; /* it can't possibly match */
975#endif
976
977 continue_matching:
978 for (;;)
979 {
980 switch (*code++)
981 {
982 case Cend:
983 if (partend != part_2_end)
984 match_end = text - string1;
985 else
986 match_end = text - string2 + size1;
987 if (regs)
988 {
989 regs->start[0] = pos;
990 regs->end[0] = match_end;
991 if (!bufp->uses_registers)
992 {
993 for (a = 1; a < RE_NREGS; a++)
994 {
995 regs->start[a] = -1;
996 regs->end[a] = -1;
997 }
998 }
999 else
1000 {
1001 for (a = 1; a < RE_NREGS; a++)
1002 {
1003 if (regend_text[a] == NULL)
1004 {
1005 regs->start[a] = -1;
1006 regs->end[a] = -1;
1007 continue;
1008 }
1009 if (regstart_partend[a] != part_2_end)
1010 regs->start[a] = regstart_text[a] - string1;
1011 else
1012 regs->start[a] = regstart_text[a] - string2 + size1;
1013 if (regend_partend[a] != part_2_end)
1014 regs->end[a] = regend_text[a] - string1;
1015 else
1016 regs->end[a] = regend_text[a] - string2 + size1;
1017 }
1018 }
1019 }
1020 if (failure_stack_start != initial_failure_stack)
1021 free((char *)failure_stack_start);
1022 return match_end - pos;
1023 case Cbol:
1024 if (text == string1 || text[-1] == '\n') /* text[-1] always valid */
1025 break;
1026 goto fail;
1027 case Ceol:
1028 if (text == string2 + size2 ||
1029 (text == string1 + size1 ?
1030 (size2 == 0 || *string2 == '\n') :
1031 *text == '\n'))
1032 break;
1033 goto fail;
1034 case Cset:
1035 NEXTCHAR(ch);
1036 if (code[ch/8] & (1<<(ch & 7)))
1037 {
1038 code += 256/8;
1039 break;
1040 }
1041 goto fail;
1042 case Cexact:
1043 NEXTCHAR(ch);
1044 if (ch != (unsigned char)*code++)
1045 goto fail;
1046 break;
1047 case Canychar:
1048 NEXTCHAR(ch);
1049 if (ch == '\n')
1050 goto fail;
1051 break;
1052 case Cstart_memory:
1053 reg = *code++;
1054 regstart_text[reg] = text;
1055 regstart_partend[reg] = partend;
1056 break;
1057 case Cend_memory:
1058 reg = *code++;
1059 regend_text[reg] = text;
1060 regend_partend[reg] = partend;
1061 break;
1062 case Cmatch_memory:
1063 reg = *code++;
1064 if (regend_text[reg] == NULL)
1065 goto fail; /* or should we just match nothing? */
1066 regtext = regstart_text[reg];
1067 regtextend = regend_text[reg];
1068 if (regstart_partend[reg] == regend_partend[reg])
1069 regpartend = regtextend;
1070 else
1071 regpartend = string1 + size1;
1072
1073 for (;regtext != regtextend;)
1074 {
1075 NEXTCHAR(ch);
1076 if (regtext == regpartend)
1077 regtext = string2;
1078 regch = (unsigned char)*regtext++;
1079 if (translate)
1080 regch = (unsigned char)translate[regch];
1081 if (regch != ch)
1082 goto fail;
1083 }
1084 break;
1085 case Cstar_jump:
1086 /* star is coded as:
1087 1: failure_jump 2
1088 ... code for operand of star
1089 star_jump 1
1090 2: ... code after star
1091 We change the star_jump to update_failure_jump if we can determine
1092 that it is safe to do so; otherwise we change it to an ordinary
1093 jump.
1094 plus is coded as
1095 jump 2
1096 1: failure_jump 3
1097 2: ... code for operand of plus
1098 star_jump 1
1099 3: ... code after plus
1100 For star_jump considerations this is processed identically
1101 to star. */
1102 a = (unsigned char)*code++;
1103 a |= (unsigned char)*code++ << 8;
1104 a = (int)(short)a;
1105 {
1106 char map[256], can_be_null;
1107 char *p1, *p2;
1108
1109 p1 = code + a + 3; /* skip the failure_jump */
1110 assert(p1[-3] == Cfailure_jump);
1111 p2 = code;
1112 /* p1 points inside loop, p2 points to after loop */
1113 if (!re_do_compile_fastmap(bufp->buffer, bufp->used,
1114 p2 - bufp->buffer, &can_be_null, map))
1115 goto make_normal_jump;
1116 /* If we might introduce a new update point inside the loop,
1117 we can't optimize because then update_jump would update a
1118 wrong failure point. Thus we have to be quite careful here. */
1119 loop_p1:
1120 /* loop until we find something that consumes a character */
1121 switch (*p1++)
1122 {
1123 case Cbol:
1124 case Ceol:
1125 case Cbegbuf:
1126 case Cendbuf:
1127 case Cwordbeg:
1128 case Cwordend:
1129 case Cwordbound:
1130 case Cnotwordbound:
1131#ifdef emacs
1132 case Cemacs_at_dot:
1133#endif /* emacs */
1134 goto loop_p1;
1135 case Cstart_memory:
1136 case Cend_memory:
1137 p1++;
1138 goto loop_p1;
1139 case Cexact:
1140 ch = (unsigned char)*p1++;
1141 if (map[ch])
1142 goto make_normal_jump;
1143 break;
1144 case Canychar:
1145 for (b = 0; b < 256; b++)
1146 if (b != '\n' && map[b])
1147 goto make_normal_jump;
1148 break;
1149 case Cset:
1150 for (b = 0; b < 256; b++)
1151 if ((p1[b >> 3] & (1 << (b & 7))) && map[b])
1152 goto make_normal_jump;
1153 p1 += 256/8;
1154 break;
1155 default:
1156 goto make_normal_jump;
1157 }
1158 /* now we know that we can't backtrack. */
1159 while (p1 != p2 - 3)
1160 {
1161 switch (*p1++)
1162 {
1163 case Cend:
1164 abort(); /* we certainly shouldn't get this inside loop */
1165 /*NOTREACHED*/
1166 case Cbol:
1167 case Ceol:
1168 case Canychar:
1169 case Cbegbuf:
1170 case Cendbuf:
1171 case Cwordbeg:
1172 case Cwordend:
1173 case Cwordbound:
1174 case Cnotwordbound:
1175#ifdef emacs
1176 case Cemacs_at_dot:
1177#endif /* emacs */
1178 break;
1179 case Cset:
1180 p1 += 256/8;
1181 break;
1182 case Cexact:
1183 case Cstart_memory:
1184 case Cend_memory:
1185 case Cmatch_memory:
1186 case Csyntaxspec:
1187 case Cnotsyntaxspec:
1188 p1++;
1189 break;
1190 case Cjump:
1191 case Cstar_jump:
1192 case Cfailure_jump:
1193 case Cupdate_failure_jump:
1194 case Cdummy_failure_jump:
1195 goto make_normal_jump;
1196 default:
1197 printf("regexpr.c: processing star_jump: unknown op %d\n", p1[-1]);
1198 break;
1199 }
1200 }
1201 goto make_update_jump;
1202 }
1203 make_normal_jump:
1204 /* printf("changing to normal jump\n"); */
1205 code -= 3;
1206 *code = Cjump;
1207 break;
1208 make_update_jump:
1209 /* printf("changing to update jump\n"); */
1210 code -= 2;
1211 a += 3; /* jump to after the Cfailure_jump */
1212 code[-1] = Cupdate_failure_jump;
1213 code[0] = a & 0xff;
1214 code[1] = a >> 8;
1215 /* fall to next case */
1216 case Cupdate_failure_jump:
1217 failure_sp[-1].text = text;
1218 failure_sp[-1].partend = partend;
1219 /* fall to next case */
1220 case Cjump:
1221 a = (unsigned char)*code++;
1222 a |= (unsigned char)*code++ << 8;
1223 code += (int)(short)a;
1224 break;
1225 case Cdummy_failure_jump:
1226 case Cfailure_jump:
1227 if (failure_sp == failure_stack_end)
1228 {
1229 if (failure_stack_start != initial_failure_stack)
1230 goto error;
1231 failure_stack_start = (struct failure_point *)
1232 malloc(MAX_FAILURES * sizeof(*failure_stack_start));
1233 failure_stack_end = failure_stack_start + MAX_FAILURES;
1234 memcpy((char *)failure_stack_start, (char *)initial_failure_stack,
1235 INITIAL_FAILURES * sizeof(*failure_stack_start));
1236 failure_sp = failure_stack_start + INITIAL_FAILURES;
1237 }
1238 a = (unsigned char)*code++;
1239 a |= (unsigned char)*code++ << 8;
1240 a = (int)(short)a;
1241 if (code[-3] == Cdummy_failure_jump)
1242 { /* this is only used in plus */
1243 assert(*code == Cfailure_jump);
1244 b = (unsigned char)code[1];
1245 b |= (unsigned char)code[2] << 8;
1246 failure_sp->code = code + (int)(short)b + 3;
1247 failure_sp->text = NULL;
1248 code += a;
1249 }
1250 else
1251 {
1252 failure_sp->code = code + a;
1253 failure_sp->text = text;
1254 failure_sp->partend = partend;
1255 }
1256 failure_sp++;
1257 break;
1258 case Cbegbuf:
1259 if (text == string1)
1260 break;
1261 goto fail;
1262 case Cendbuf:
1263 if (size2 == 0 ? text == string1 + size1 : text == string2 + size2)
1264 break;
1265 goto fail;
1266 case Cwordbeg:
1267 if (text == string2 + size2)
1268 goto fail;
1269 if (size2 == 0 && text == string1 + size1)
1270 goto fail;
1271 if (SYNTAX(text == string1 + size1 ? *string1 : *text) != Sword)
1272 goto fail;
1273 if (text == string1)
1274 break;
1275 if (SYNTAX(text[-1]) != Sword)
1276 break;
1277 goto fail;
1278 case Cwordend:
1279 if (text == string1)
1280 goto fail;
1281 if (SYNTAX(text[-1]) != Sword)
1282 goto fail;
1283 if (text == string2 + size2)
1284 break;
1285 if (size2 == 0 && text == string1 + size1)
1286 break;
1287 if (SYNTAX(*text) == Sword)
1288 goto fail;
1289 break;
1290 case Cwordbound:
1291 /* Note: as in gnu regexp, this also matches at the beginning
1292 and end of buffer. */
1293 if (text == string1 || text == string2 + size2 ||
1294 (size2 == 0 && text == string1 + size1))
1295 break;
1296 if ((SYNTAX(text[-1]) == Sword) ^
1297 (SYNTAX(text == string1 + size1 ? *string2 : *text) == Sword))
1298 break;
1299 goto fail;
1300 case Cnotwordbound:
1301 /* Note: as in gnu regexp, this never matches at the beginning
1302 and end of buffer. */
1303 if (text == string1 || text == string2 + size2 ||
1304 (size2 == 0 && text == string1 + size1))
1305 goto fail;
1306 if (!((SYNTAX(text[-1]) == Sword) ^
1307 (SYNTAX(text == string1 + size1 ? *string2 : *text) == Sword)))
1308 goto fail;
1309 break;
1310 case Csyntaxspec:
1311 NEXTCHAR(ch);
1312 if (SYNTAX(ch) != (unsigned char)*code++)
1313 goto fail;
1314 break;
1315 case Cnotsyntaxspec:
1316 NEXTCHAR(ch);
1317 if (SYNTAX(ch) != (unsigned char)*code++)
1318 break;
1319 goto fail;
1320#ifdef emacs
1321 case Cemacs_at_dot:
1322 if (PTR_CHAR_POS((unsigned char *)text) + 1 != point)
1323 goto fail;
1324 break;
1325#endif /* emacs */
1326 default:
1327 abort();
1328 /*NOTREACHED*/
1329 }
1330 }
1331 abort();
1332 /*NOTREACHED*/
1333
1334 fail:
1335 if (failure_sp != failure_stack_start)
1336 {
1337 failure_sp--;
1338 text = failure_sp->text;
1339 if (text == NULL)
1340 goto fail;
1341 partend = failure_sp->partend;
1342 code = failure_sp->code;
1343 goto continue_matching;
1344 }
1345 if (failure_stack_start != initial_failure_stack)
1346 free((char *)failure_stack_start);
1347 return -1;
1348
1349 error:
1350 if (failure_stack_start != initial_failure_stack)
1351 free((char *)failure_stack_start);
1352 return -2;
1353}
1354
1355#undef PREFETCH
1356#undef NEXTCHAR
1357#undef PUSH_FAILURE
1358
1359int re_match(bufp, string, size, pos, regs)
1360regexp_t bufp;
1361char *string;
1362int size, pos;
1363regexp_registers_t regs;
1364{
1365 return re_match_2(bufp, string, size, (char *)NULL, 0, pos, regs, size);
1366}
1367
1368int re_search_2(bufp, string1, size1, string2, size2, pos, range, regs,
1369 mstop)
1370regexp_t bufp;
1371char *string1, *string2;
1372int size1, size2, pos, range, mstop;
1373regexp_registers_t regs;
1374{
1375 char *fastmap, *translate, *text, *partstart, *partend;
1376 int dir, ret;
1377 char anchor;
1378
1379 assert(size1 >= 0 && size2 >= 0 && pos >= 0 && mstop >= 0);
1380 assert(pos + range + 1 >= 0 && pos + range - 1 <= size1 + size2);
1381 assert(pos <= mstop);
1382
1383 fastmap = bufp->fastmap;
1384 translate = bufp->translate;
1385 if (fastmap && !bufp->fastmap_accurate)
1386 re_compile_fastmap(bufp);
1387 anchor = bufp->anchor;
1388 if (bufp->can_be_null == 1) /* can_be_null == 2: can match null at eob */
1389 fastmap = NULL;
1390 if (range < 0)
1391 {
1392 dir = -1;
1393 range = -range;
1394 }
1395 else
1396 dir = 1;
1397 if (anchor == 2)
1398 if (pos != 0)
1399 return -1;
1400 else
1401 range = 0;
1402 for (; range >= 0; range--, pos += dir)
1403 {
1404 if (fastmap)
1405 {
1406 if (dir == 1)
1407 { /* searching forwards */
1408 if (pos < size1)
1409 {
1410 text = string1 + pos;
1411 if (pos + range > size1)
1412 partend = string1 + size1;
1413 else
1414 partend = string1 + pos + range;
1415 }
1416 else
1417 {
1418 text = string2 + pos - size1;
1419 partend = string2 + pos + range - size1;
1420 }
1421 partstart = text;
1422 if (translate)
1423 while (text != partend &&
1424 !fastmap[(unsigned char)
1425 translate[(unsigned char)*text]])
1426 text++;
1427 else
1428 while (text != partend && !fastmap[(unsigned char)*text])
1429 text++;
1430 pos += text - partstart;
1431 range -= text - partstart;
1432 if (pos == size1 + size2 && bufp->can_be_null == 0)
1433 return -1;
1434 }
1435 else
1436 { /* searching backwards */
1437 if (pos <= size1)
1438 {
1439 text = string1 + pos;
1440 partstart = string1 + pos - range;
1441 }
1442 else
1443 {
1444 text = string2 + pos - size1;
1445 if (range < pos - size1)
1446 partstart = string2 + pos - size1 - range;
1447 else
1448 partstart = string2;
1449 }
1450 partend = text;
1451 if (translate)
1452 while (text != partstart &&
1453 !fastmap[(unsigned char)
1454 translate[(unsigned char)*text]])
1455 text--;
1456 else
1457 while (text != partstart &&
1458 !fastmap[(unsigned char)*text])
1459 text--;
1460 pos -= partend - text;
1461 range -= partend - text;
1462 }
1463 }
1464 if (anchor == 1)
1465 { /* anchored to begline */
1466 if (pos > 0 &&
1467 (pos <= size1 ? string1[pos - 1] :
1468 string2[pos - size1 - 1]) != '\n')
1469 continue;
1470 }
1471 assert(pos >= 0 && pos <= size1 + size2);
1472 ret = re_match_2(bufp, string1, size1, string2, size2, pos, regs, mstop);
1473 if (ret >= 0)
1474 return pos;
1475 if (ret == -2)
1476 return -2;
1477 }
1478 return -1;
1479}
1480
1481int re_search(bufp, string, size, startpos, range, regs)
1482regexp_t bufp;
1483char *string;
1484int size, startpos, range;
1485regexp_registers_t regs;
1486{
1487 return re_search_2(bufp, string, size, (char *)NULL, 0,
1488 startpos, range, regs, size);
1489}
1490
1491static struct re_pattern_buffer re_comp_buf;
1492
1493char *re_comp(s)
1494char *s;
1495{
1496 if (s == NULL)
1497 {
1498 if (!re_comp_buf.buffer)
1499 return "Out of memory";
1500 return NULL;
1501 }
1502 if (!re_comp_buf.buffer)
1503 {
1504 /* the buffer will be allocated automatically */
1505 re_comp_buf.fastmap = malloc(256);
1506 re_comp_buf.translate = NULL;
1507 }
1508 return re_compile_pattern(s, strlen(s), &re_comp_buf);
1509}
1510
1511int re_exec(s)
1512char *s;
1513{
1514 int len = strlen(s);
1515
1516 return re_search(&re_comp_buf, s, len, 0, len, (regexp_registers_t)NULL) >= 0;
1517}
1518
1519#ifdef TEST_REGEXP
1520
1521int main()
1522{
1523 char buf[500];
1524 char *cp;
1525 struct re_pattern_buffer exp;
1526 struct re_registers regs;
1527 int a,pos;
1528 char fastmap[256];
1529
1530 exp.allocated = 0;
1531 exp.buffer = 0;
1532 exp.translate = NULL;
1533 exp.fastmap = fastmap;
1534
1535 /* re_set_syntax(RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_ANSI_HEX); */
1536
1537 while (1)
1538 {
1539 printf("Enter regexp:\n");
1540 gets(buf);
1541 cp=re_compile_pattern(buf, strlen(buf), &exp);
1542 if (cp)
1543 {
1544 printf("Error: %s\n", cp);
1545 continue;
1546 }
1547 re_compile_fastmap(&exp);
1548 printf("dump:\n");
1549 for (pos = 0; pos < exp.used;)
1550 {
1551 printf("%d: ", pos);
1552 switch (exp.buffer[pos++])
1553 {
1554 case Cend:
1555 strcpy(buf, "end");
1556 break;
1557 case Cbol:
1558 strcpy(buf, "bol");
1559 break;
1560 case Ceol:
1561 strcpy(buf, "eol");
1562 break;
1563 case Cset:
1564 strcpy(buf, "set ");
1565 for (a = 0; a < 256/8; a++)
1566 sprintf(buf+strlen(buf)," %02x",
1567 (unsigned char)exp.buffer[pos++]);
1568 break;
1569 case Cexact:
1570 sprintf(buf, "exact '%c' 0x%x", exp.buffer[pos],
1571 (unsigned char)exp.buffer[pos]);
1572 pos++;
1573 break;
1574 case Canychar:
1575 strcpy(buf, "anychar");
1576 break;
1577 case Cstart_memory:
1578 sprintf(buf, "start_memory %d", exp.buffer[pos++]);
1579 break;
1580 case Cend_memory:
1581 sprintf(buf, "end_memory %d", exp.buffer[pos++]);
1582 break;
1583 case Cmatch_memory:
1584 sprintf(buf, "match_memory %d", exp.buffer[pos++]);
1585 break;
1586 case Cjump:
1587 case Cdummy_failure_jump:
1588 case Cstar_jump:
1589 case Cfailure_jump:
1590 case Cupdate_failure_jump:
1591 a = (unsigned char)exp.buffer[pos++];
1592 a += (unsigned char)exp.buffer[pos++] << 8;
1593 a = (int)(short)a;
1594 switch (exp.buffer[pos-3])
1595 {
1596 case Cjump:
1597 cp = "jump";
1598 break;
1599 case Cstar_jump:
1600 cp = "star_jump";
1601 break;
1602 case Cfailure_jump:
1603 cp = "failure_jump";
1604 break;
1605 case Cupdate_failure_jump:
1606 cp = "update_failure_jump";
1607 break;
1608 case Cdummy_failure_jump:
1609 cp = "dummy_failure_jump";
1610 break;
1611 default:
1612 cp = "unknown jump";
1613 break;
1614 }
1615 sprintf(buf, "%s %d", cp, a + pos);
1616 break;
1617 case Cbegbuf:
1618 strcpy(buf,"begbuf");
1619 break;
1620 case Cendbuf:
1621 strcpy(buf,"endbuf");
1622 break;
1623 case Cwordbeg:
1624 strcpy(buf,"wordbeg");
1625 break;
1626 case Cwordend:
1627 strcpy(buf,"wordend");
1628 break;
1629 case Cwordbound:
1630 strcpy(buf,"wordbound");
1631 break;
1632 case Cnotwordbound:
1633 strcpy(buf,"notwordbound");
1634 break;
1635 default:
1636 sprintf(buf, "unknown code %d",
1637 (unsigned char)exp.buffer[pos - 1]);
1638 break;
1639 }
1640 printf("%s\n", buf);
1641 }
1642 printf("can_be_null = %d uses_registers = %d anchor = %d\n",
1643 exp.can_be_null, exp.uses_registers, exp.anchor);
1644
1645 printf("fastmap:");
1646 for (a = 0; a < 256; a++)
1647 if (exp.fastmap[a])
1648 printf(" %d", a);
1649 printf("\n");
1650 printf("Enter strings. An empty line terminates.\n");
1651 while (fgets(buf, sizeof(buf), stdin))
1652 {
1653 if (buf[0] == '\n')
1654 break;
1655 a = re_search(&exp, buf, strlen(buf), 0, strlen(buf), &regs);
1656 printf("search returns %d\n", a);
1657 if (a != -1)
1658 {
1659 for (a = 0; a < RE_NREGS; a++)
1660 {
1661 printf("buf %d: %d to %d\n", a, regs.start[a], regs.end[a]);
1662 }
1663 }
1664 }
1665 }
1666}
1667
1668#endif /* TEST_REGEXP */