blob: c7693482cebf868e6a74fe658cfed2b28002d994 [file] [log] [blame]
Guido van Rossumb674c3b1992-01-19 16:32:47 +00001/*
2
3regexpr.c
4
5Author: Tatu Ylonen <ylo@ngs.fi>
6
7Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
8
9Permission to use, copy, modify, distribute, and sell this software
10and its documentation for any purpose is hereby granted without fee,
11provided that the above copyright notice appear in all copies. This
12software is provided "as is" without express or implied warranty.
13
14Created: Thu Sep 26 17:14:05 1991 ylo
15Last modified: Mon Nov 4 17:06:48 1991 ylo
16
17This code draws many ideas from the regular expression packages by
18Henry Spencer of the University of Toronto and Richard Stallman of the
19Free Software Foundation.
20
21Emacs-specific code and syntax table code is almost directly borrowed
22from GNU regexp.
23
24$Header$
25
26*/
27
28#include <stdio.h>
29#include <assert.h>
30#include "regexpr.h"
31
32char *malloc();
33void free();
34char *realloc();
35
36#define MACRO_BEGIN do {
37#define MACRO_END } while (0)
38
39enum regexp_compiled_ops /* opcodes for compiled regexp */
40{
41 Cend, /* end of pattern reached */
42 Cbol, /* beginning of line */
43 Ceol, /* end of line */
44 Cset, /* character set. Followed by 32 bytes of set. */
45 Cexact, /* followed by a byte to match */
46 Canychar, /* matches any character except newline */
47 Cstart_memory, /* set register start addr (followed by reg number) */
48 Cend_memory, /* set register end addr (followed by reg number) */
49 Cmatch_memory, /* match a duplicate of reg contents (regnum follows)*/
50 Cjump, /* followed by two bytes (lsb,msb) of displacement. */
51 Cstar_jump, /* will change to jump/update_failure_jump at runtime */
52 Cfailure_jump, /* jump to addr on failure */
53 Cupdate_failure_jump, /* update topmost failure point and jump */
54 Cdummy_failure_jump, /* push a dummy failure point and jump */
55 Cbegbuf, /* match at beginning of buffer */
56 Cendbuf, /* match at end of buffer */
57 Cwordbeg, /* match at beginning of word */
58 Cwordend, /* match at end of word */
59 Cwordbound, /* match if at word boundary */
60 Cnotwordbound, /* match if not at word boundary */
61#ifdef emacs
62 Cemacs_at_dot, /* emacs only: matches at dot */
63#endif /* emacs */
64 Csyntaxspec, /* matches syntax code (1 byte follows) */
65 Cnotsyntaxspec /* matches if syntax code does not match (1 byte foll)*/
66};
67
68enum regexp_syntax_op /* syntax codes for plain and quoted characters */
69{
70 Rend, /* special code for end of regexp */
71 Rnormal, /* normal character */
72 Ranychar, /* any character except newline */
73 Rquote, /* the quote character */
74 Rbol, /* match beginning of line */
75 Reol, /* match end of line */
76 Roptional, /* match preceding expression optionally */
77 Rstar, /* match preceding expr zero or more times */
78 Rplus, /* match preceding expr one or more times */
79 Ror, /* match either of alternatives */
80 Ropenpar, /* opening parenthesis */
81 Rclosepar, /* closing parenthesis */
82 Rmemory, /* match memory register */
83 Rextended_memory, /* \vnn to match registers 10-99 */
84 Ropenset, /* open set. Internal syntax hard-coded below. */
85 /* the following are gnu extensions to "normal" regexp syntax */
86 Rbegbuf, /* beginning of buffer */
87 Rendbuf, /* end of buffer */
88 Rwordchar, /* word character */
89 Rnotwordchar, /* not word character */
90 Rwordbeg, /* beginning of word */
91 Rwordend, /* end of word */
92 Rwordbound, /* word bound */
93 Rnotwordbound, /* not word bound */
94#ifdef emacs
95 Remacs_at_dot, /* emacs: at dot */
96 Remacs_syntaxspec, /* syntaxspec */
97 Remacs_notsyntaxspec, /* notsyntaxspec */
98#endif /* emacs */
99 Rnum_ops
100};
101
102static int re_compile_initialized = 0;
103static int regexp_syntax = 0;
104static unsigned char regexp_plain_ops[256];
105static unsigned char regexp_quoted_ops[256];
106static unsigned char regexp_precedences[Rnum_ops];
107static int regexp_context_indep_ops;
108static int regexp_ansi_sequences;
109
110#define NUM_LEVELS 5 /* number of precedence levels in use */
111#define MAX_NESTING 100 /* max nesting level of operators */
112
113#ifdef emacs
114
115/* This code is for emacs compatibility only. */
116
117#include "config.h"
118#include "lisp.h"
119#include "buffer.h"
120#include "syntax.h"
121
122/* emacs defines NULL in some strange way? */
123#undef NULL
124#define NULL 0
125
126#else /* emacs */
127
128#define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)]
129#define Sword 1
130
131#ifdef SYNTAX_TABLE
132char *re_syntax_table;
133#else
134static char re_syntax_table[256];
135#endif /* SYNTAX_TABLE */
136
137#endif /* emacs */
138
139static void re_compile_initialize()
140{
141 int a;
142
143#if !defined(emacs) && !defined(SYNTAX_TABLE)
144 static int syntax_table_inited = 0;
145
146 if (!syntax_table_inited)
147 {
148 syntax_table_inited = 1;
149 memset(re_syntax_table, 0, 256);
150 for (a = 'a'; a <= 'z'; a++)
151 re_syntax_table[a] = Sword;
152 for (a = 'A'; a <= 'Z'; a++)
153 re_syntax_table[a] = Sword;
154 for (a = '0'; a <= '9'; a++)
155 re_syntax_table[a] = Sword;
156 }
157#endif /* !emacs && !SYNTAX_TABLE */
158 re_compile_initialized = 1;
159 for (a = 0; a < 256; a++)
160 {
161 regexp_plain_ops[a] = Rnormal;
162 regexp_quoted_ops[a] = Rnormal;
163 }
164 for (a = '0'; a <= '9'; a++)
165 regexp_quoted_ops[a] = Rmemory;
166 regexp_plain_ops['\134'] = Rquote;
167 if (regexp_syntax & RE_NO_BK_PARENS)
168 {
169 regexp_plain_ops['('] = Ropenpar;
170 regexp_plain_ops[')'] = Rclosepar;
171 }
172 else
173 {
174 regexp_quoted_ops['('] = Ropenpar;
175 regexp_quoted_ops[')'] = Rclosepar;
176 }
177 if (regexp_syntax & RE_NO_BK_VBAR)
178 regexp_plain_ops['\174'] = Ror;
179 else
180 regexp_quoted_ops['\174'] = Ror;
181 regexp_plain_ops['*'] = Rstar;
182 if (regexp_syntax & RE_BK_PLUS_QM)
183 {
184 regexp_quoted_ops['+'] = Rplus;
185 regexp_quoted_ops['?'] = Roptional;
186 }
187 else
188 {
189 regexp_plain_ops['+'] = Rplus;
190 regexp_plain_ops['?'] = Roptional;
191 }
192 if (regexp_syntax & RE_NEWLINE_OR)
193 regexp_plain_ops['\n'] = Ror;
194 regexp_plain_ops['\133'] = Ropenset;
195 regexp_plain_ops['\136'] = Rbol;
196 regexp_plain_ops['$'] = Reol;
197 regexp_plain_ops['.'] = Ranychar;
198 if (!(regexp_syntax & RE_NO_GNU_EXTENSIONS))
199 {
200#ifdef emacs
201 regexp_quoted_ops['='] = Remacs_at_dot;
202 regexp_quoted_ops['s'] = Remacs_syntaxspec;
203 regexp_quoted_ops['S'] = Remacs_notsyntaxspec;
204#endif /* emacs */
205 regexp_quoted_ops['w'] = Rwordchar;
206 regexp_quoted_ops['W'] = Rnotwordchar;
207 regexp_quoted_ops['<'] = Rwordbeg;
208 regexp_quoted_ops['>'] = Rwordend;
209 regexp_quoted_ops['b'] = Rwordbound;
210 regexp_quoted_ops['B'] = Rnotwordbound;
211 regexp_quoted_ops['`'] = Rbegbuf;
212 regexp_quoted_ops['\''] = Rendbuf;
213 }
214 if (regexp_syntax & RE_ANSI_HEX)
215 regexp_quoted_ops['v'] = Rextended_memory;
216 for (a = 0; a < Rnum_ops; a++)
217 regexp_precedences[a] = 4;
218 if (regexp_syntax & RE_TIGHT_VBAR)
219 {
220 regexp_precedences[Ror] = 3;
221 regexp_precedences[Rbol] = 2;
222 regexp_precedences[Reol] = 2;
223 }
224 else
225 {
226 regexp_precedences[Ror] = 2;
227 regexp_precedences[Rbol] = 3;
228 regexp_precedences[Reol] = 3;
229 }
230 regexp_precedences[Rclosepar] = 1;
231 regexp_precedences[Rend] = 0;
232 regexp_context_indep_ops = (regexp_syntax & RE_CONTEXT_INDEP_OPS) != 0;
233 regexp_ansi_sequences = (regexp_syntax & RE_ANSI_HEX) != 0;
234}
235
236int re_set_syntax(syntax)
237int syntax;
238{
239 int ret;
240
241 ret = regexp_syntax;
242 regexp_syntax = syntax;
243 re_compile_initialize();
244 return ret;
245}
246
247static int hex_char_to_decimal(ch)
248int ch;
249{
250 if (ch >= '0' && ch <= '9')
251 return ch - '0';
252 if (ch >= 'a' && ch <= 'f')
253 return ch - 'a' + 10;
254 if (ch >= 'A' && ch <= 'F')
255 return ch - 'A' + 10;
256 return 16;
257}
258
259char *re_compile_pattern(regex, size, bufp)
260char *regex;
261int size;
262regexp_t bufp;
263{
264 int a, pos, op, current_level, level, opcode;
265 int pattern_offset, alloc;
266 int starts[NUM_LEVELS * MAX_NESTING], starts_base;
267 int future_jumps[MAX_NESTING], num_jumps;
268 unsigned char ch;
269 char *pattern, *translate;
270 int next_register, paren_depth, num_open_registers, open_registers[RE_NREGS];
271 int beginning_context;
272
273#define NEXTCHAR(var) \
274 MACRO_BEGIN \
275 if (pos >= size) \
276 goto ends_prematurely; \
277 (var) = regex[pos]; \
278 pos++; \
279 MACRO_END
280
281#define ALLOC(amount) \
282 MACRO_BEGIN \
283 if (pattern_offset+(amount) > alloc) \
284 { \
285 alloc += 256 + (amount); \
286 pattern = realloc(pattern, alloc); \
287 if (!pattern) \
288 goto out_of_memory; \
289 } \
290 MACRO_END
291
292#define STORE(ch) pattern[pattern_offset++] = (ch)
293
294#define CURRENT_LEVEL_START (starts[starts_base + current_level])
295
296#define SET_LEVEL_START starts[starts_base + current_level] = pattern_offset
297
298#define PUSH_LEVEL_STARTS if (starts_base < (MAX_NESTING-1)*NUM_LEVELS) \
299 starts_base += NUM_LEVELS; \
300 else \
301 goto too_complex
302
303#define POP_LEVEL_STARTS starts_base -= NUM_LEVELS
304
305#define PUT_ADDR(offset,addr) \
306 MACRO_BEGIN \
307 int disp = (addr) - (offset) - 2; \
308 pattern[(offset)] = disp & 0xff; \
309 pattern[(offset)+1] = (disp>>8) & 0xff; \
310 MACRO_END
311
312#define INSERT_JUMP(pos,type,addr) \
313 MACRO_BEGIN \
314 int a, p = (pos), t = (type), ad = (addr); \
315 for (a = pattern_offset - 1; a >= p; a--) \
316 pattern[a + 3] = pattern[a]; \
317 pattern[p] = t; \
318 PUT_ADDR(p+1,ad); \
319 pattern_offset += 3; \
320 MACRO_END
321
322#define SETBIT(buf,offset,bit) (buf)[(offset)+(bit)/8] |= (1<<((bit) & 7))
323
324#define SET_FIELDS \
325 MACRO_BEGIN \
326 bufp->allocated = alloc; \
327 bufp->buffer = pattern; \
328 bufp->used = pattern_offset; \
329 MACRO_END
330
331#define GETHEX(var) \
332 MACRO_BEGIN \
333 char gethex_ch, gethex_value; \
334 NEXTCHAR(gethex_ch); \
335 gethex_value = hex_char_to_decimal(gethex_ch); \
336 if (gethex_value == 16) \
337 goto hex_error; \
338 NEXTCHAR(gethex_ch); \
339 gethex_ch = hex_char_to_decimal(gethex_ch); \
340 if (gethex_ch == 16) \
341 goto hex_error; \
342 (var) = gethex_value * 16 + gethex_ch; \
343 MACRO_END
344
345#define ANSI_TRANSLATE(ch) \
346 MACRO_BEGIN \
347 switch (ch) \
348 { \
349 case 'a': \
350 case 'A': \
351 ch = 7; /* audible bell */ \
352 break; \
353 case 'b': \
354 case 'B': \
355 ch = 8; /* backspace */ \
356 break; \
357 case 'f': \
358 case 'F': \
359 ch = 12; /* form feed */ \
360 break; \
361 case 'n': \
362 case 'N': \
363 ch = 10; /* line feed */ \
364 break; \
365 case 'r': \
366 case 'R': \
367 ch = 13; /* carriage return */ \
368 break; \
369 case 't': \
370 case 'T': \
371 ch = 9; /* tab */ \
372 break; \
373 case 'v': \
374 case 'V': \
375 ch = 11; /* vertical tab */ \
376 break; \
377 case 'x': /* hex code */ \
378 case 'X': \
379 GETHEX(ch); \
380 break; \
381 default: \
382 /* other characters passed through */ \
383 if (translate) \
384 ch = translate[(unsigned char)ch]; \
385 break; \
386 } \
387 MACRO_END
388
389 if (!re_compile_initialized)
390 re_compile_initialize();
391 bufp->used = 0;
392 bufp->fastmap_accurate = 0;
393 bufp->uses_registers = 0;
394 translate = bufp->translate;
395 pattern = bufp->buffer;
396 alloc = bufp->allocated;
397 if (alloc == 0 || pattern == NULL)
398 {
399 alloc = 256;
400 pattern = malloc(alloc);
401 if (!pattern)
402 goto out_of_memory;
403 }
404 pattern_offset = 0;
405 starts_base = 0;
406 num_jumps = 0;
407 current_level = 0;
408 SET_LEVEL_START;
409 num_open_registers = 0;
410 next_register = 1;
411 paren_depth = 0;
412 beginning_context = 1;
413 op = -1;
414 /* we use Rend dummy to ensure that pending jumps are updated (due to
415 low priority of Rend) before exiting the loop. */
416 pos = 0;
417 while (op != Rend)
418 {
419 if (pos >= size)
420 op = Rend;
421 else
422 {
423 NEXTCHAR(ch);
424 if (translate)
425 ch = translate[(unsigned char)ch];
426 op = regexp_plain_ops[(unsigned char)ch];
427 if (op == Rquote)
428 {
429 NEXTCHAR(ch);
430 op = regexp_quoted_ops[(unsigned char)ch];
431 if (op == Rnormal && regexp_ansi_sequences)
432 ANSI_TRANSLATE(ch);
433 }
434 }
435 level = regexp_precedences[op];
436 /* printf("ch='%c' op=%d level=%d current_level=%d curlevstart=%d\n",
437 ch, op, level, current_level, CURRENT_LEVEL_START); */
438 if (level > current_level)
439 {
440 for (current_level++; current_level < level; current_level++)
441 SET_LEVEL_START;
442 SET_LEVEL_START;
443 }
444 else
445 if (level < current_level)
446 {
447 current_level = level;
448 for (;num_jumps > 0 &&
449 future_jumps[num_jumps-1] >= CURRENT_LEVEL_START;
450 num_jumps--)
451 PUT_ADDR(future_jumps[num_jumps-1], pattern_offset);
452 }
453 switch (op)
454 {
455 case Rend:
456 break;
457 case Rnormal:
458 normal_char:
459 opcode = Cexact;
460 store_opcode_and_arg: /* opcode & ch must be set */
461 SET_LEVEL_START;
462 ALLOC(2);
463 STORE(opcode);
464 STORE(ch);
465 break;
466 case Ranychar:
467 opcode = Canychar;
468 store_opcode:
469 SET_LEVEL_START;
470 ALLOC(1);
471 STORE(opcode);
472 break;
473 case Rquote:
474 abort();
475 /*NOTREACHED*/
476 case Rbol:
477 if (!beginning_context)
478 if (regexp_context_indep_ops)
479 goto op_error;
480 else
481 goto normal_char;
482 opcode = Cbol;
483 goto store_opcode;
484 case Reol:
485 if (!((pos >= size) ||
486 ((regexp_syntax & RE_NO_BK_VBAR) ?
487 (regex[pos] == '\174') :
488 (pos+1 < size && regex[pos] == '\134' &&
489 regex[pos+1] == '\174')) ||
490 ((regexp_syntax & RE_NO_BK_PARENS)?
491 (regex[pos] == ')'):
492 (pos+1 < size && regex[pos] == '\134' &&
493 regex[pos+1] == ')'))))
494 if (regexp_context_indep_ops)
495 goto op_error;
496 else
497 goto normal_char;
498 opcode = Ceol;
499 goto store_opcode;
500 break;
501 case Roptional:
502 if (beginning_context)
503 if (regexp_context_indep_ops)
504 goto op_error;
505 else
506 goto normal_char;
507 if (CURRENT_LEVEL_START == pattern_offset)
508 break; /* ignore empty patterns for ? */
509 ALLOC(3);
510 INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
511 pattern_offset + 3);
512 break;
513 case Rstar:
514 case Rplus:
515 if (beginning_context)
516 if (regexp_context_indep_ops)
517 goto op_error;
518 else
519 goto normal_char;
520 if (CURRENT_LEVEL_START == pattern_offset)
521 break; /* ignore empty patterns for + and * */
522 ALLOC(9);
523 INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
524 pattern_offset + 6);
525 INSERT_JUMP(pattern_offset, Cstar_jump, CURRENT_LEVEL_START);
526 if (op == Rplus) /* jump over initial failure_jump */
527 INSERT_JUMP(CURRENT_LEVEL_START, Cdummy_failure_jump,
528 CURRENT_LEVEL_START + 6);
529 break;
530 case Ror:
531 ALLOC(6);
532 INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
533 pattern_offset + 6);
534 if (num_jumps >= MAX_NESTING)
535 goto too_complex;
536 STORE(Cjump);
537 future_jumps[num_jumps++] = pattern_offset;
538 STORE(0);
539 STORE(0);
540 SET_LEVEL_START;
541 break;
542 case Ropenpar:
543 SET_LEVEL_START;
544 if (next_register < RE_NREGS)
545 {
546 bufp->uses_registers = 1;
547 ALLOC(2);
548 STORE(Cstart_memory);
549 STORE(next_register);
550 open_registers[num_open_registers++] = next_register;
551 next_register++;
552 }
553 paren_depth++;
554 PUSH_LEVEL_STARTS;
555 current_level = 0;
556 SET_LEVEL_START;
557 break;
558 case Rclosepar:
559 if (paren_depth <= 0)
560 goto parenthesis_error;
561 POP_LEVEL_STARTS;
562 current_level = regexp_precedences[Ropenpar];
563 paren_depth--;
564 if (paren_depth < num_open_registers)
565 {
566 bufp->uses_registers = 1;
567 ALLOC(2);
568 STORE(Cend_memory);
569 num_open_registers--;
570 STORE(open_registers[num_open_registers]);
571 }
572 break;
573 case Rmemory:
574 if (ch == '0')
575 goto bad_match_register;
576 assert(ch >= '0' && ch <= '9');
577 bufp->uses_registers = 1;
578 opcode = Cmatch_memory;
579 ch -= '0';
580 goto store_opcode_and_arg;
581 case Rextended_memory:
582 NEXTCHAR(ch);
583 if (ch < '0' || ch > '9')
584 goto bad_match_register;
585 NEXTCHAR(a);
586 if (a < '0' || a > '9')
587 goto bad_match_register;
588 ch = 10 * (a - '0') + ch - '0';
589 if (ch <= 0 || ch >= RE_NREGS)
590 goto bad_match_register;
591 bufp->uses_registers = 1;
592 opcode = Cmatch_memory;
593 goto store_opcode_and_arg;
594 case Ropenset:
595 {
596 int complement,prev,offset,range,firstchar;
597
598 SET_LEVEL_START;
599 ALLOC(1+256/8);
600 STORE(Cset);
601 offset = pattern_offset;
602 for (a = 0; a < 256/8; a++)
603 STORE(0);
604 NEXTCHAR(ch);
605 if (translate)
606 ch = translate[(unsigned char)ch];
607 if (ch == '\136')
608 {
609 complement = 1;
610 NEXTCHAR(ch);
611 if (translate)
612 ch = translate[(unsigned char)ch];
613 }
614 else
615 complement = 0;
616 prev = -1;
617 range = 0;
618 firstchar = 1;
619 while (ch != '\135' || firstchar)
620 {
621 firstchar = 0;
622 if (regexp_ansi_sequences && ch == '\134')
623 {
624 NEXTCHAR(ch);
625 ANSI_TRANSLATE(ch);
626 }
627 if (range)
628 {
629 for (a = prev; a <= ch; a++)
630 SETBIT(pattern, offset, a);
631 prev = -1;
632 range = 0;
633 }
634 else
635 if (prev != -1 && ch == '-')
636 range = 1;
637 else
638 {
639 SETBIT(pattern, offset, ch);
640 prev = ch;
641 }
642 NEXTCHAR(ch);
643 if (translate)
644 ch = translate[(unsigned char)ch];
645 }
646 if (range)
647 SETBIT(pattern, offset, '-');
648 if (complement)
649 {
650 for (a = 0; a < 256/8; a++)
651 pattern[offset+a] ^= 0xff;
652 }
653 break;
654 }
655 case Rbegbuf:
656 opcode = Cbegbuf;
657 goto store_opcode;
658 case Rendbuf:
659 opcode = Cendbuf;
660 goto store_opcode;
661 case Rwordchar:
662 opcode = Csyntaxspec;
663 ch = Sword;
664 goto store_opcode_and_arg;
665 case Rnotwordchar:
666 opcode = Cnotsyntaxspec;
667 ch = Sword;
668 goto store_opcode_and_arg;
669 case Rwordbeg:
670 opcode = Cwordbeg;
671 goto store_opcode;
672 case Rwordend:
673 opcode = Cwordend;
674 goto store_opcode;
675 case Rwordbound:
676 opcode = Cwordbound;
677 goto store_opcode;
678 case Rnotwordbound:
679 opcode = Cnotwordbound;
680 goto store_opcode;
681#ifdef emacs
682 case Remacs_at_dot:
683 opcode = Cemacs_at_dot;
684 goto store_opcode;
685 case Remacs_syntaxspec:
686 NEXTCHAR(ch);
687 if (translate)
688 ch = translate[(unsigned char)ch];
689 opcode = Csyntaxspec;
690 ch = syntax_spec_code[(unsigned char)ch];
691 goto store_opcode_and_arg;
692 case Remacs_notsyntaxspec:
693 NEXTCHAR(ch);
694 if (translate)
695 ch = translate[(unsigned char)ch];
696 opcode = Cnotsyntaxspec;
697 ch = syntax_spec_code[(unsigned char)ch];
698 goto store_opcode_and_arg;
699#endif /* emacs */
700 default:
701 abort();
702 }
703 beginning_context = (op == Ropenpar || op == Ror);
704 }
705 if (starts_base != 0)
706 goto parenthesis_error;
707 assert(num_jumps == 0);
708 ALLOC(1);
709 STORE(Cend);
710 SET_FIELDS;
711 return NULL;
712
713 op_error:
714 SET_FIELDS;
715 return "Badly placed special character";
716
717 bad_match_register:
718 SET_FIELDS;
719 return "Bad match register number";
720
721 hex_error:
722 SET_FIELDS;
723 return "Bad hexadecimal number";
724
725 parenthesis_error:
726 SET_FIELDS;
727 return "Badly placed parenthesis";
728
729 out_of_memory:
730 SET_FIELDS;
731 return "Out of memory";
732
733 ends_prematurely:
734 SET_FIELDS;
735 return "Regular expression ends prematurely";
736
737 too_complex:
738 SET_FIELDS;
739 return "Regular expression too complex";
740}
741#undef CHARAT
742#undef NEXTCHAR
743#undef GETHEX
744#undef ALLOC
745#undef STORE
746#undef CURRENT_LEVEL_START
747#undef SET_LEVEL_START
748#undef PUSH_LEVEL_STARTS
749#undef POP_LEVEL_STARTS
750#undef PUT_ADDR
751#undef INSERT_JUMP
752#undef SETBIT
753#undef SET_FIELDS
754
755static void re_compile_fastmap_aux(code, pos, visited, can_be_null, fastmap)
756char *code, *visited, *can_be_null, *fastmap;
757int pos;
758{
759 int a, b, syntaxcode;
760
761 if (visited[pos])
762 return; /* we have already been here */
763 visited[pos] = 1;
764 for (;;)
765 switch (code[pos++])
766 {
767 case Cend:
768 *can_be_null = 1;
769 return;
770 case Cbol:
771 case Cbegbuf:
772 case Cendbuf:
773 case Cwordbeg:
774 case Cwordend:
775 case Cwordbound:
776 case Cnotwordbound:
777#ifdef emacs
778 case Cemacs_at_dot:
779#endif /* emacs */
780 break;
781 case Csyntaxspec:
782 syntaxcode = code[pos++];
783 for (a = 0; a < 256; a++)
784 if (SYNTAX(a) == syntaxcode)
785 fastmap[a] = 1;
786 return;
787 case Cnotsyntaxspec:
788 syntaxcode = code[pos++];
789 for (a = 0; a < 256; a++)
790 if (SYNTAX(a) != syntaxcode)
791 fastmap[a] = 1;
792 return;
793 case Ceol:
794 fastmap['\n'] = 1;
795 if (*can_be_null == 0)
796 *can_be_null = 2; /* can match null, but only at end of buffer*/
797 return;
798 case Cset:
799 for (a = 0; a < 256/8; a++)
800 if (code[pos + a] != 0)
801 for (b = 0; b < 8; b++)
802 if (code[pos + a] & (1 << b))
803 fastmap[(a << 3) + b] = 1;
804 pos += 256/8;
805 return;
806 case Cexact:
807 fastmap[(unsigned char)code[pos]] = 1;
808 return;
809 case Canychar:
810 for (a = 0; a < 256; a++)
811 if (a != '\n')
812 fastmap[a] = 1;
813 return;
814 case Cstart_memory:
815 case Cend_memory:
816 pos++;
817 break;
818 case Cmatch_memory:
819 /* should this ever happen for sensible patterns??? */
820 *can_be_null = 1;
821 return;
822 case Cjump:
823 case Cdummy_failure_jump:
824 case Cupdate_failure_jump:
825 case Cstar_jump:
826 a = (unsigned char)code[pos++];
827 a |= (unsigned char)code[pos++] << 8;
828 pos += (int)(short)a;
829 if (visited[pos])
830 {
831 /* argh... the regexp contains empty loops. This is not
832 good, as this may cause a failure stack overflow when
833 matching. Oh well. */
834 /* this path leads nowhere; pursue other paths. */
835 return;
836 }
837 visited[pos] = 1;
838 break;
839 case Cfailure_jump:
840 a = (unsigned char)code[pos++];
841 a |= (unsigned char)code[pos++] << 8;
842 a = pos + (int)(short)a;
843 re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap);
844 break;
845 default:
846 abort(); /* probably some opcode is missing from this switch */
847 /*NOTREACHED*/
848 }
849}
850
851static int re_do_compile_fastmap(buffer, used, pos, can_be_null, fastmap)
852char *buffer, *fastmap, *can_be_null;
853int used, pos;
854{
855 char small_visited[512], *visited;
856
857 if (used <= sizeof(small_visited))
858 visited = small_visited;
859 else
860 {
861 visited = malloc(used);
862 if (!visited)
863 return 0;
864 }
865 *can_be_null = 0;
866 memset(fastmap, 0, 256);
867 memset(visited, 0, used);
868 re_compile_fastmap_aux(buffer, pos, visited, can_be_null, fastmap);
869 if (visited != small_visited)
870 free(visited);
871 return 1;
872}
873
874void re_compile_fastmap(bufp)
875regexp_t bufp;
876{
877 if (!bufp->fastmap || bufp->fastmap_accurate)
878 return;
879 assert(bufp->used > 0);
880 if (!re_do_compile_fastmap(bufp->buffer, bufp->used, 0, &bufp->can_be_null,
881 bufp->fastmap))
882 return;
883 if (bufp->buffer[0] == Cbol)
884 bufp->anchor = 1; /* begline */
885 else
886 if (bufp->buffer[0] == Cbegbuf)
887 bufp->anchor = 2; /* begbuf */
888 else
889 bufp->anchor = 0; /* none */
890 bufp->fastmap_accurate = 1;
891}
892
893#define INITIAL_FAILURES 128 /* initial # failure points to allocate */
894#define MAX_FAILURES 4100 /* max # of failure points before failing */
895
896int re_match_2(bufp, string1, size1, string2, size2, pos, regs, mstop)
897regexp_t bufp;
898char *string1, *string2;
899int size1, size2, pos, mstop;
900regexp_registers_t regs;
901{
902 struct failure_point { char *text, *partend, *code; }
903 *failure_stack_start, *failure_sp, *failure_stack_end,
904 initial_failure_stack[INITIAL_FAILURES];
905 char *code, *translate, *text, *textend, *partend, *part_2_end;
906 char *regstart_text[RE_NREGS], *regstart_partend[RE_NREGS];
907 char *regend_text[RE_NREGS], *regend_partend[RE_NREGS];
908 int a, b, ch, reg, regch, match_end;
909 char *regtext, *regpartend, *regtextend;
910
911#define PREFETCH \
912 MACRO_BEGIN \
913 if (text == partend) \
914 { \
915 if (text == textend) \
916 goto fail; \
917 text = string2; \
918 partend = part_2_end; \
919 } \
920 MACRO_END
921
922#define NEXTCHAR(var) \
923 MACRO_BEGIN \
924 PREFETCH; \
925 (var) = (unsigned char)*text++; \
926 if (translate) \
927 (var) = (unsigned char)translate[(var)]; \
928 MACRO_END
929
930 assert(pos >= 0 && size1 >= 0 && size2 >= 0 && mstop >= 0);
931 assert(mstop <= size1 + size2);
932 assert(pos <= mstop);
933
934 if (pos <= size1)
935 {
936 text = string1 + pos;
937 if (mstop <= size1)
938 {
939 partend = string1 + mstop;
940 textend = partend;
941 }
942 else
943 {
944 partend = string1 + size1;
945 textend = string2 + mstop - size1;
946 }
947 part_2_end = string2 + mstop - size1;
948 }
949 else
950 {
951 text = string2 + pos - size1;
952 partend = string2 + mstop - size1;
953 textend = partend;
954 part_2_end = partend;
955 }
956
957 if (bufp->uses_registers && regs != NULL)
958 for (a = 0; a < RE_NREGS; a++)
959 regend_text[a] = NULL;
960
961 code = bufp->buffer;
962 translate = bufp->translate;
963 failure_stack_start = failure_sp = initial_failure_stack;
964 failure_stack_end = initial_failure_stack + INITIAL_FAILURES;
965
966#if 0
967 /* re_search_2 has already done this, and otherwise we get little benefit
968 from this. So I'll leave this out. */
969 if (bufp->fastmap_accurate && !bufp->can_be_null &&
970 text != textend &&
971 !bufp->fastmap[translate ?
972 (unsigned char)translate[(unsigned char)*text] :
973 (unsigned char)*text])
974 return -1; /* it can't possibly match */
975#endif
976
977 continue_matching:
978 for (;;)
979 {
980 switch (*code++)
981 {
982 case Cend:
983 if (partend != part_2_end)
984 match_end = text - string1;
985 else
986 match_end = text - string2 + size1;
987 if (regs)
988 {
989 regs->start[0] = pos;
990 regs->end[0] = match_end;
991 if (!bufp->uses_registers)
992 {
993 for (a = 1; a < RE_NREGS; a++)
994 {
995 regs->start[a] = -1;
996 regs->end[a] = -1;
997 }
998 }
999 else
1000 {
1001 for (a = 1; a < RE_NREGS; a++)
1002 {
1003 if (regend_text[a] == NULL)
1004 {
1005 regs->start[a] = -1;
1006 regs->end[a] = -1;
1007 continue;
1008 }
1009 if (regstart_partend[a] != part_2_end)
1010 regs->start[a] = regstart_text[a] - string1;
1011 else
1012 regs->start[a] = regstart_text[a] - string2 + size1;
1013 if (regend_partend[a] != part_2_end)
1014 regs->end[a] = regend_text[a] - string1;
1015 else
1016 regs->end[a] = regend_text[a] - string2 + size1;
1017 }
1018 }
1019 }
1020 if (failure_stack_start != initial_failure_stack)
1021 free((char *)failure_stack_start);
1022 return match_end - pos;
1023 case Cbol:
1024 if (text == string1 || text[-1] == '\n') /* text[-1] always valid */
1025 break;
1026 goto fail;
1027 case Ceol:
1028 if (text == string2 + size2 ||
1029 (text == string1 + size1 ?
1030 (size2 == 0 || *string2 == '\n') :
1031 *text == '\n'))
1032 break;
1033 goto fail;
1034 case Cset:
1035 NEXTCHAR(ch);
1036 if (code[ch/8] & (1<<(ch & 7)))
1037 {
1038 code += 256/8;
1039 break;
1040 }
1041 goto fail;
1042 case Cexact:
1043 NEXTCHAR(ch);
1044 if (ch != (unsigned char)*code++)
1045 goto fail;
1046 break;
1047 case Canychar:
1048 NEXTCHAR(ch);
1049 if (ch == '\n')
1050 goto fail;
1051 break;
1052 case Cstart_memory:
1053 reg = *code++;
1054 regstart_text[reg] = text;
1055 regstart_partend[reg] = partend;
1056 break;
1057 case Cend_memory:
1058 reg = *code++;
1059 regend_text[reg] = text;
1060 regend_partend[reg] = partend;
1061 break;
1062 case Cmatch_memory:
1063 reg = *code++;
1064 if (regend_text[reg] == NULL)
1065 goto fail; /* or should we just match nothing? */
1066 regtext = regstart_text[reg];
1067 regtextend = regend_text[reg];
1068 if (regstart_partend[reg] == regend_partend[reg])
1069 regpartend = regtextend;
1070 else
1071 regpartend = string1 + size1;
1072
1073 for (;regtext != regtextend;)
1074 {
1075 NEXTCHAR(ch);
1076 if (regtext == regpartend)
1077 regtext = string2;
1078 regch = (unsigned char)*regtext++;
1079 if (translate)
1080 regch = (unsigned char)translate[regch];
1081 if (regch != ch)
1082 goto fail;
1083 }
1084 break;
1085 case Cstar_jump:
1086 /* star is coded as:
1087 1: failure_jump 2
1088 ... code for operand of star
1089 star_jump 1
1090 2: ... code after star
1091 We change the star_jump to update_failure_jump if we can determine
1092 that it is safe to do so; otherwise we change it to an ordinary
1093 jump.
1094 plus is coded as
1095 jump 2
1096 1: failure_jump 3
1097 2: ... code for operand of plus
1098 star_jump 1
1099 3: ... code after plus
1100 For star_jump considerations this is processed identically
1101 to star. */
1102 a = (unsigned char)*code++;
1103 a |= (unsigned char)*code++ << 8;
1104 a = (int)(short)a;
1105 {
1106 char map[256], can_be_null;
1107 char *p1, *p2;
1108
1109 p1 = code + a + 3; /* skip the failure_jump */
1110 assert(p1[-3] == Cfailure_jump);
1111 p2 = code;
1112 /* p1 points inside loop, p2 points to after loop */
1113 if (!re_do_compile_fastmap(bufp->buffer, bufp->used,
1114 p2 - bufp->buffer, &can_be_null, map))
1115 goto make_normal_jump;
1116 /* If we might introduce a new update point inside the loop,
1117 we can't optimize because then update_jump would update a
1118 wrong failure point. Thus we have to be quite careful here. */
1119 loop_p1:
1120 /* loop until we find something that consumes a character */
1121 switch (*p1++)
1122 {
1123 case Cbol:
1124 case Ceol:
1125 case Cbegbuf:
1126 case Cendbuf:
1127 case Cwordbeg:
1128 case Cwordend:
1129 case Cwordbound:
1130 case Cnotwordbound:
1131#ifdef emacs
1132 case Cemacs_at_dot:
1133#endif /* emacs */
1134 goto loop_p1;
1135 case Cstart_memory:
1136 case Cend_memory:
1137 p1++;
1138 goto loop_p1;
1139 case Cexact:
1140 ch = (unsigned char)*p1++;
1141 if (map[ch])
1142 goto make_normal_jump;
1143 break;
1144 case Canychar:
1145 for (b = 0; b < 256; b++)
1146 if (b != '\n' && map[b])
1147 goto make_normal_jump;
1148 break;
1149 case Cset:
1150 for (b = 0; b < 256; b++)
1151 if ((p1[b >> 3] & (1 << (b & 7))) && map[b])
1152 goto make_normal_jump;
1153 p1 += 256/8;
1154 break;
1155 default:
1156 goto make_normal_jump;
1157 }
1158 /* now we know that we can't backtrack. */
1159 while (p1 != p2 - 3)
1160 {
1161 switch (*p1++)
1162 {
1163 case Cend:
1164 abort(); /* we certainly shouldn't get this inside loop */
1165 /*NOTREACHED*/
1166 case Cbol:
1167 case Ceol:
1168 case Canychar:
1169 case Cbegbuf:
1170 case Cendbuf:
1171 case Cwordbeg:
1172 case Cwordend:
1173 case Cwordbound:
1174 case Cnotwordbound:
1175#ifdef emacs
1176 case Cemacs_at_dot:
1177#endif /* emacs */
1178 break;
1179 case Cset:
1180 p1 += 256/8;
1181 break;
1182 case Cexact:
1183 case Cstart_memory:
1184 case Cend_memory:
1185 case Cmatch_memory:
1186 case Csyntaxspec:
1187 case Cnotsyntaxspec:
1188 p1++;
1189 break;
1190 case Cjump:
1191 case Cstar_jump:
1192 case Cfailure_jump:
1193 case Cupdate_failure_jump:
1194 case Cdummy_failure_jump:
1195 goto make_normal_jump;
1196 default:
1197 printf("regexpr.c: processing star_jump: unknown op %d\n", p1[-1]);
1198 break;
1199 }
1200 }
1201 goto make_update_jump;
1202 }
1203 make_normal_jump:
1204 /* printf("changing to normal jump\n"); */
1205 code -= 3;
1206 *code = Cjump;
1207 break;
1208 make_update_jump:
1209 /* printf("changing to update jump\n"); */
1210 code -= 2;
1211 a += 3; /* jump to after the Cfailure_jump */
1212 code[-1] = Cupdate_failure_jump;
1213 code[0] = a & 0xff;
1214 code[1] = a >> 8;
1215 /* fall to next case */
1216 case Cupdate_failure_jump:
1217 failure_sp[-1].text = text;
1218 failure_sp[-1].partend = partend;
1219 /* fall to next case */
1220 case Cjump:
1221 a = (unsigned char)*code++;
1222 a |= (unsigned char)*code++ << 8;
1223 code += (int)(short)a;
1224 break;
1225 case Cdummy_failure_jump:
1226 case Cfailure_jump:
1227 if (failure_sp == failure_stack_end)
1228 {
1229 if (failure_stack_start != initial_failure_stack)
1230 goto error;
1231 failure_stack_start = (struct failure_point *)
1232 malloc(MAX_FAILURES * sizeof(*failure_stack_start));
1233 failure_stack_end = failure_stack_start + MAX_FAILURES;
1234 memcpy((char *)failure_stack_start, (char *)initial_failure_stack,
1235 INITIAL_FAILURES * sizeof(*failure_stack_start));
1236 failure_sp = failure_stack_start + INITIAL_FAILURES;
1237 }
1238 a = (unsigned char)*code++;
1239 a |= (unsigned char)*code++ << 8;
1240 a = (int)(short)a;
1241 if (code[-3] == Cdummy_failure_jump)
1242 { /* this is only used in plus */
1243 assert(*code == Cfailure_jump);
1244 b = (unsigned char)code[1];
1245 b |= (unsigned char)code[2] << 8;
1246 failure_sp->code = code + (int)(short)b + 3;
1247 failure_sp->text = NULL;
1248 code += a;
1249 }
1250 else
1251 {
1252 failure_sp->code = code + a;
1253 failure_sp->text = text;
1254 failure_sp->partend = partend;
1255 }
1256 failure_sp++;
1257 break;
1258 case Cbegbuf:
1259 if (text == string1)
1260 break;
1261 goto fail;
1262 case Cendbuf:
1263 if (size2 == 0 ? text == string1 + size1 : text == string2 + size2)
1264 break;
1265 goto fail;
1266 case Cwordbeg:
1267 if (text == string2 + size2)
1268 goto fail;
1269 if (size2 == 0 && text == string1 + size1)
1270 goto fail;
1271 if (SYNTAX(text == string1 + size1 ? *string1 : *text) != Sword)
1272 goto fail;
1273 if (text == string1)
1274 break;
1275 if (SYNTAX(text[-1]) != Sword)
1276 break;
1277 goto fail;
1278 case Cwordend:
1279 if (text == string1)
1280 goto fail;
1281 if (SYNTAX(text[-1]) != Sword)
1282 goto fail;
1283 if (text == string2 + size2)
1284 break;
1285 if (size2 == 0 && text == string1 + size1)
1286 break;
1287 if (SYNTAX(*text) == Sword)
1288 goto fail;
1289 break;
1290 case Cwordbound:
1291 /* Note: as in gnu regexp, this also matches at the beginning
1292 and end of buffer. */
1293 if (text == string1 || text == string2 + size2 ||
1294 (size2 == 0 && text == string1 + size1))
1295 break;
1296 if ((SYNTAX(text[-1]) == Sword) ^
1297 (SYNTAX(text == string1 + size1 ? *string2 : *text) == Sword))
1298 break;
1299 goto fail;
1300 case Cnotwordbound:
1301 /* Note: as in gnu regexp, this never matches at the beginning
1302 and end of buffer. */
1303 if (text == string1 || text == string2 + size2 ||
1304 (size2 == 0 && text == string1 + size1))
1305 goto fail;
1306 if (!((SYNTAX(text[-1]) == Sword) ^
1307 (SYNTAX(text == string1 + size1 ? *string2 : *text) == Sword)))
1308 goto fail;
1309 break;
1310 case Csyntaxspec:
1311 NEXTCHAR(ch);
1312 if (SYNTAX(ch) != (unsigned char)*code++)
1313 goto fail;
1314 break;
1315 case Cnotsyntaxspec:
1316 NEXTCHAR(ch);
1317 if (SYNTAX(ch) != (unsigned char)*code++)
1318 break;
1319 goto fail;
1320#ifdef emacs
1321 case Cemacs_at_dot:
1322 if (PTR_CHAR_POS((unsigned char *)text) + 1 != point)
1323 goto fail;
1324 break;
1325#endif /* emacs */
1326 default:
1327 abort();
1328 /*NOTREACHED*/
1329 }
1330 }
Guido van Rossum5f21dd11992-01-19 16:49:14 +00001331#if 0 /* This line is never reached */
Guido van Rossumb674c3b1992-01-19 16:32:47 +00001332 abort();
Guido van Rossum5f21dd11992-01-19 16:49:14 +00001333#endif
Guido van Rossumb674c3b1992-01-19 16:32:47 +00001334 /*NOTREACHED*/
1335
1336 fail:
1337 if (failure_sp != failure_stack_start)
1338 {
1339 failure_sp--;
1340 text = failure_sp->text;
1341 if (text == NULL)
1342 goto fail;
1343 partend = failure_sp->partend;
1344 code = failure_sp->code;
1345 goto continue_matching;
1346 }
1347 if (failure_stack_start != initial_failure_stack)
1348 free((char *)failure_stack_start);
1349 return -1;
1350
1351 error:
1352 if (failure_stack_start != initial_failure_stack)
1353 free((char *)failure_stack_start);
1354 return -2;
1355}
1356
1357#undef PREFETCH
1358#undef NEXTCHAR
1359#undef PUSH_FAILURE
1360
1361int re_match(bufp, string, size, pos, regs)
1362regexp_t bufp;
1363char *string;
1364int size, pos;
1365regexp_registers_t regs;
1366{
1367 return re_match_2(bufp, string, size, (char *)NULL, 0, pos, regs, size);
1368}
1369
1370int re_search_2(bufp, string1, size1, string2, size2, pos, range, regs,
1371 mstop)
1372regexp_t bufp;
1373char *string1, *string2;
1374int size1, size2, pos, range, mstop;
1375regexp_registers_t regs;
1376{
1377 char *fastmap, *translate, *text, *partstart, *partend;
1378 int dir, ret;
1379 char anchor;
1380
1381 assert(size1 >= 0 && size2 >= 0 && pos >= 0 && mstop >= 0);
1382 assert(pos + range + 1 >= 0 && pos + range - 1 <= size1 + size2);
1383 assert(pos <= mstop);
1384
1385 fastmap = bufp->fastmap;
1386 translate = bufp->translate;
1387 if (fastmap && !bufp->fastmap_accurate)
1388 re_compile_fastmap(bufp);
1389 anchor = bufp->anchor;
1390 if (bufp->can_be_null == 1) /* can_be_null == 2: can match null at eob */
1391 fastmap = NULL;
1392 if (range < 0)
1393 {
1394 dir = -1;
1395 range = -range;
1396 }
1397 else
1398 dir = 1;
1399 if (anchor == 2)
1400 if (pos != 0)
1401 return -1;
1402 else
1403 range = 0;
1404 for (; range >= 0; range--, pos += dir)
1405 {
1406 if (fastmap)
1407 {
1408 if (dir == 1)
1409 { /* searching forwards */
1410 if (pos < size1)
1411 {
1412 text = string1 + pos;
1413 if (pos + range > size1)
1414 partend = string1 + size1;
1415 else
1416 partend = string1 + pos + range;
1417 }
1418 else
1419 {
1420 text = string2 + pos - size1;
1421 partend = string2 + pos + range - size1;
1422 }
1423 partstart = text;
1424 if (translate)
1425 while (text != partend &&
1426 !fastmap[(unsigned char)
1427 translate[(unsigned char)*text]])
1428 text++;
1429 else
1430 while (text != partend && !fastmap[(unsigned char)*text])
1431 text++;
1432 pos += text - partstart;
1433 range -= text - partstart;
1434 if (pos == size1 + size2 && bufp->can_be_null == 0)
1435 return -1;
1436 }
1437 else
1438 { /* searching backwards */
1439 if (pos <= size1)
1440 {
1441 text = string1 + pos;
1442 partstart = string1 + pos - range;
1443 }
1444 else
1445 {
1446 text = string2 + pos - size1;
1447 if (range < pos - size1)
1448 partstart = string2 + pos - size1 - range;
1449 else
1450 partstart = string2;
1451 }
1452 partend = text;
1453 if (translate)
1454 while (text != partstart &&
1455 !fastmap[(unsigned char)
1456 translate[(unsigned char)*text]])
1457 text--;
1458 else
1459 while (text != partstart &&
1460 !fastmap[(unsigned char)*text])
1461 text--;
1462 pos -= partend - text;
1463 range -= partend - text;
1464 }
1465 }
1466 if (anchor == 1)
1467 { /* anchored to begline */
1468 if (pos > 0 &&
1469 (pos <= size1 ? string1[pos - 1] :
1470 string2[pos - size1 - 1]) != '\n')
1471 continue;
1472 }
1473 assert(pos >= 0 && pos <= size1 + size2);
1474 ret = re_match_2(bufp, string1, size1, string2, size2, pos, regs, mstop);
1475 if (ret >= 0)
1476 return pos;
1477 if (ret == -2)
1478 return -2;
1479 }
1480 return -1;
1481}
1482
1483int re_search(bufp, string, size, startpos, range, regs)
1484regexp_t bufp;
1485char *string;
1486int size, startpos, range;
1487regexp_registers_t regs;
1488{
1489 return re_search_2(bufp, string, size, (char *)NULL, 0,
1490 startpos, range, regs, size);
1491}
1492
1493static struct re_pattern_buffer re_comp_buf;
1494
1495char *re_comp(s)
1496char *s;
1497{
1498 if (s == NULL)
1499 {
1500 if (!re_comp_buf.buffer)
1501 return "Out of memory";
1502 return NULL;
1503 }
1504 if (!re_comp_buf.buffer)
1505 {
1506 /* the buffer will be allocated automatically */
1507 re_comp_buf.fastmap = malloc(256);
1508 re_comp_buf.translate = NULL;
1509 }
1510 return re_compile_pattern(s, strlen(s), &re_comp_buf);
1511}
1512
1513int re_exec(s)
1514char *s;
1515{
1516 int len = strlen(s);
1517
1518 return re_search(&re_comp_buf, s, len, 0, len, (regexp_registers_t)NULL) >= 0;
1519}
1520
1521#ifdef TEST_REGEXP
1522
1523int main()
1524{
1525 char buf[500];
1526 char *cp;
1527 struct re_pattern_buffer exp;
1528 struct re_registers regs;
1529 int a,pos;
1530 char fastmap[256];
1531
1532 exp.allocated = 0;
1533 exp.buffer = 0;
1534 exp.translate = NULL;
1535 exp.fastmap = fastmap;
1536
1537 /* re_set_syntax(RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_ANSI_HEX); */
1538
1539 while (1)
1540 {
1541 printf("Enter regexp:\n");
1542 gets(buf);
1543 cp=re_compile_pattern(buf, strlen(buf), &exp);
1544 if (cp)
1545 {
1546 printf("Error: %s\n", cp);
1547 continue;
1548 }
1549 re_compile_fastmap(&exp);
1550 printf("dump:\n");
1551 for (pos = 0; pos < exp.used;)
1552 {
1553 printf("%d: ", pos);
1554 switch (exp.buffer[pos++])
1555 {
1556 case Cend:
1557 strcpy(buf, "end");
1558 break;
1559 case Cbol:
1560 strcpy(buf, "bol");
1561 break;
1562 case Ceol:
1563 strcpy(buf, "eol");
1564 break;
1565 case Cset:
1566 strcpy(buf, "set ");
1567 for (a = 0; a < 256/8; a++)
1568 sprintf(buf+strlen(buf)," %02x",
1569 (unsigned char)exp.buffer[pos++]);
1570 break;
1571 case Cexact:
1572 sprintf(buf, "exact '%c' 0x%x", exp.buffer[pos],
1573 (unsigned char)exp.buffer[pos]);
1574 pos++;
1575 break;
1576 case Canychar:
1577 strcpy(buf, "anychar");
1578 break;
1579 case Cstart_memory:
1580 sprintf(buf, "start_memory %d", exp.buffer[pos++]);
1581 break;
1582 case Cend_memory:
1583 sprintf(buf, "end_memory %d", exp.buffer[pos++]);
1584 break;
1585 case Cmatch_memory:
1586 sprintf(buf, "match_memory %d", exp.buffer[pos++]);
1587 break;
1588 case Cjump:
1589 case Cdummy_failure_jump:
1590 case Cstar_jump:
1591 case Cfailure_jump:
1592 case Cupdate_failure_jump:
1593 a = (unsigned char)exp.buffer[pos++];
1594 a += (unsigned char)exp.buffer[pos++] << 8;
1595 a = (int)(short)a;
1596 switch (exp.buffer[pos-3])
1597 {
1598 case Cjump:
1599 cp = "jump";
1600 break;
1601 case Cstar_jump:
1602 cp = "star_jump";
1603 break;
1604 case Cfailure_jump:
1605 cp = "failure_jump";
1606 break;
1607 case Cupdate_failure_jump:
1608 cp = "update_failure_jump";
1609 break;
1610 case Cdummy_failure_jump:
1611 cp = "dummy_failure_jump";
1612 break;
1613 default:
1614 cp = "unknown jump";
1615 break;
1616 }
1617 sprintf(buf, "%s %d", cp, a + pos);
1618 break;
1619 case Cbegbuf:
1620 strcpy(buf,"begbuf");
1621 break;
1622 case Cendbuf:
1623 strcpy(buf,"endbuf");
1624 break;
1625 case Cwordbeg:
1626 strcpy(buf,"wordbeg");
1627 break;
1628 case Cwordend:
1629 strcpy(buf,"wordend");
1630 break;
1631 case Cwordbound:
1632 strcpy(buf,"wordbound");
1633 break;
1634 case Cnotwordbound:
1635 strcpy(buf,"notwordbound");
1636 break;
1637 default:
1638 sprintf(buf, "unknown code %d",
1639 (unsigned char)exp.buffer[pos - 1]);
1640 break;
1641 }
1642 printf("%s\n", buf);
1643 }
1644 printf("can_be_null = %d uses_registers = %d anchor = %d\n",
1645 exp.can_be_null, exp.uses_registers, exp.anchor);
1646
1647 printf("fastmap:");
1648 for (a = 0; a < 256; a++)
1649 if (exp.fastmap[a])
1650 printf(" %d", a);
1651 printf("\n");
1652 printf("Enter strings. An empty line terminates.\n");
1653 while (fgets(buf, sizeof(buf), stdin))
1654 {
1655 if (buf[0] == '\n')
1656 break;
1657 a = re_search(&exp, buf, strlen(buf), 0, strlen(buf), &regs);
1658 printf("search returns %d\n", a);
1659 if (a != -1)
1660 {
1661 for (a = 0; a < RE_NREGS; a++)
1662 {
1663 printf("buf %d: %d to %d\n", a, regs.start[a], regs.end[a]);
1664 }
1665 }
1666 }
1667 }
1668}
1669
1670#endif /* TEST_REGEXP */