blob: 3d4a9bc7c0f70799cefb3ea6a1cf3bb78d916e42 [file] [log] [blame]
Tanguy Pruvot36efc942011-11-20 14:41:41 +01001/* Extended regular expression matching and search library, version
2 0.12. (Implements POSIX draft P10003.2/D11.2, except for
3 internationalization features.)
4
5 Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
20 USA. */
21
22/* AIX requires this to be the first thing in the file. */
23#if defined (_AIX) && !defined (REGEX_MALLOC)
24 #pragma alloca
25#endif
26
27#undef _GNU_SOURCE
28#define _GNU_SOURCE
29
30#include "cs_config.h"
31
32#define os_random random
33#define HAVE_PTHREAD 1
34
35#ifdef HAVE_CONFIG_H
36#include <config.h>
37#endif
38
39/* We need this for `regex.h', and perhaps for the Emacs include files. */
40#include <sys/types.h>
41
42/* This is for other GNU distributions with internationalized messages. */
43#if HAVE_LIBINTL_H || defined (_LIBC)
44# include <libintl.h>
45#else
46# define gettext(msgid) (msgid)
47#endif
48
49#ifndef gettext_noop
50/* This define is so xgettext can find the internationalizable
51 strings. */
52#define gettext_noop(String) String
53#endif
54
55/* The `emacs' switch turns on certain matching commands
56 that make sense only in Emacs. */
57#ifdef emacs
58
59#include "lisp.h"
60#include "buffer.h"
61#include "syntax.h"
62
63#else /* not emacs */
64
65/* If we are not linking with Emacs proper,
66 we can't use the relocating allocator
67 even if config.h says that we can. */
68#undef REL_ALLOC
69
70#if defined (STDC_HEADERS) || defined (_LIBC)
71#include <stdlib.h>
72#else
73char *malloc ();
74char *realloc ();
75#endif
76
77/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
78 If nothing else has been done, use the method below. */
79#ifdef INHIBIT_STRING_HEADER
80#if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
81#if !defined (bzero) && !defined (bcopy)
82#undef INHIBIT_STRING_HEADER
83#endif
84#endif
85#endif
86
87/* This is the normal way of making sure we have a bcopy and a bzero.
88 This is used in most programs--a few other programs avoid this
89 by defining INHIBIT_STRING_HEADER. */
90#ifndef INHIBIT_STRING_HEADER
91#if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC)
92#include <string.h>
93#ifndef bcmp
94#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
95#endif
96#ifndef bcopy
97#define bcopy(s, d, n) memcpy ((d), (s), (n))
98#endif
99#ifndef bzero
100#define bzero(s, n) memset ((s), 0, (n))
101#endif
102#else
103#include <strings.h>
104#endif
105#endif
106
107/* Define the syntax stuff for \<, \>, etc. */
108
109/* This must be nonzero for the wordchar and notwordchar pattern
110 commands in re_match_2. */
111#ifndef Sword
112#define Sword 1
113#endif
114
115#ifdef SWITCH_ENUM_BUG
116#define SWITCH_ENUM_CAST(x) ((int)(x))
117#else
118#define SWITCH_ENUM_CAST(x) (x)
119#endif
120
121#ifdef SYNTAX_TABLE
122
123extern char *re_syntax_table;
124
125#else /* not SYNTAX_TABLE */
126
127/* How many characters in the character set. */
128#define CHAR_SET_SIZE 256
129
130static char re_syntax_table[CHAR_SET_SIZE];
131
132static void
133init_syntax_once ()
134{
135 register int c;
136 static int done = 0;
137
138 if (done)
139 return;
140
141 bzero (re_syntax_table, sizeof re_syntax_table);
142
143 for (c = 'a'; c <= 'z'; c++)
144 re_syntax_table[c] = Sword;
145
146 for (c = 'A'; c <= 'Z'; c++)
147 re_syntax_table[c] = Sword;
148
149 for (c = '0'; c <= '9'; c++)
150 re_syntax_table[c] = Sword;
151
152 re_syntax_table['_'] = Sword;
153
154 done = 1;
155}
156
157#endif /* not SYNTAX_TABLE */
158
159#define SYNTAX(c) re_syntax_table[c]
160
161#endif /* not emacs */
162
163/* Get the interface, including the syntax bits. */
164#include "regex.h"
165
166/* isalpha etc. are used for the character classes. */
167#include <ctype.h>
168
169/* Jim Meyering writes:
170
171 "... Some ctype macros are valid only for character codes that
172 isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
173 using /bin/cc or gcc but without giving an ansi option). So, all
174 ctype uses should be through macros like ISPRINT... If
175 STDC_HEADERS is defined, then autoconf has verified that the ctype
176 macros don't need to be guarded with references to isascii. ...
177 Defining IN_CTYPE_DOMAIN to 1 should let any compiler worth its salt
178 eliminate the && through constant folding." */
179
180#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
181#define IN_CTYPE_DOMAIN(c) 1
182#else
183#define IN_CTYPE_DOMAIN(c) isascii(c)
184#endif
185
186#ifdef isblank
187#define ISBLANK(c) (IN_CTYPE_DOMAIN (c) && isblank (c))
188#else
189#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
190#endif
191#ifdef isgraph
192#define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isgraph (c))
193#else
194#define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isprint (c) && !isspace (c))
195#endif
196
197#define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
198#define ISDIGIT(c) (IN_CTYPE_DOMAIN (c) && isdigit (c))
199#define ISALNUM(c) (IN_CTYPE_DOMAIN (c) && isalnum (c))
200#define ISALPHA(c) (IN_CTYPE_DOMAIN (c) && isalpha (c))
201#define ISCNTRL(c) (IN_CTYPE_DOMAIN (c) && iscntrl (c))
202#define ISLOWER(c) (IN_CTYPE_DOMAIN (c) && islower (c))
203#define ISPUNCT(c) (IN_CTYPE_DOMAIN (c) && ispunct (c))
204#define ISSPACE(c) (IN_CTYPE_DOMAIN (c) && isspace (c))
205#define ISUPPER(c) (IN_CTYPE_DOMAIN (c) && isupper (c))
206#define ISXDIGIT(c) (IN_CTYPE_DOMAIN (c) && isxdigit (c))
207
208#ifndef NULL
209#define NULL (void *)0
210#endif
211
212/* We remove any previous definition of `SIGN_EXTEND_CHAR',
213 since ours (we hope) works properly with all combinations of
214 machines, compilers, `char' and `unsigned char' argument types.
215 (Per Bothner suggested the basic approach.) */
216#undef SIGN_EXTEND_CHAR
217#if __STDC__
218#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
219#else /* not __STDC__ */
220/* As in Harbison and Steele. */
221#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
222#endif
223
224/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
225 use `alloca' instead of `malloc'. This is because using malloc in
226 re_search* or re_match* could cause memory leaks when C-g is used in
227 Emacs; also, malloc is slower and causes storage fragmentation. On
228 the other hand, malloc is more portable, and easier to debug.
229
230 Because we sometimes use alloca, some routines have to be macros,
231 not functions -- `alloca'-allocated space disappears at the end of the
232 function it is called in. */
233
234#ifdef REGEX_MALLOC
235
236#define REGEX_ALLOCATE malloc
237#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
238#define REGEX_FREE free
239
240#else /* not REGEX_MALLOC */
241
242/* Emacs already defines alloca, sometimes. */
243#ifndef alloca
244
245/* Make alloca work the best possible way. */
246#ifdef __GNUC__
247#define alloca __builtin_alloca
248#else /* not __GNUC__ */
249#if HAVE_ALLOCA_H
250#include <alloca.h>
251#else /* not __GNUC__ or HAVE_ALLOCA_H */
252#if 0 /* It is a bad idea to declare alloca. We always cast the result. */
253#ifndef _AIX /* Already did AIX, up at the top. */
254char *alloca ();
255#endif /* not _AIX */
256#endif
257#endif /* not HAVE_ALLOCA_H */
258#endif /* not __GNUC__ */
259
260#endif /* not alloca */
261
262#define REGEX_ALLOCATE alloca
263
264/* Assumes a `char *destination' variable. */
265#define REGEX_REALLOCATE(source, osize, nsize) \
266 (destination = (char *) alloca (nsize), \
267 bcopy (source, destination, osize), \
268 destination)
269
270/* No need to do anything to free, after alloca. */
271#define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
272
273#endif /* not REGEX_MALLOC */
274
275/* Define how to allocate the failure stack. */
276
277#if defined (REL_ALLOC) && defined (REGEX_MALLOC)
278
279#define REGEX_ALLOCATE_STACK(size) \
280 r_alloc (&failure_stack_ptr, (size))
281#define REGEX_REALLOCATE_STACK(source, osize, nsize) \
282 r_re_alloc (&failure_stack_ptr, (nsize))
283#define REGEX_FREE_STACK(ptr) \
284 r_alloc_free (&failure_stack_ptr)
285
286#else /* not using relocating allocator */
287
288#ifdef REGEX_MALLOC
289
290#define REGEX_ALLOCATE_STACK malloc
291#define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
292#define REGEX_FREE_STACK free
293
294#else /* not REGEX_MALLOC */
295
296#define REGEX_ALLOCATE_STACK alloca
297
298#define REGEX_REALLOCATE_STACK(source, osize, nsize) \
299 REGEX_REALLOCATE (source, osize, nsize)
300/* No need to explicitly free anything. */
301#define REGEX_FREE_STACK(arg)
302
303#endif /* not REGEX_MALLOC */
304#endif /* not using relocating allocator */
305
306
307/* True if `size1' is non-NULL and PTR is pointing anywhere inside
308 `string1' or just past its end. This works if PTR is NULL, which is
309 a good thing. */
310#define FIRST_STRING_P(ptr) \
311 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
312
313/* (Re)Allocate N items of type T using malloc, or fail. */
314#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
315#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
316#define RETALLOC_IF(addr, n, t) \
317 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
318#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
319
320#define BYTEWIDTH 8 /* In bits. */
321
322#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
323
324#undef MAX
325#undef MIN
326#define MAX(a, b) ((a) > (b) ? (a) : (b))
327#define MIN(a, b) ((a) < (b) ? (a) : (b))
328
329typedef char boolean;
330#define false 0
331#define true 1
332
333static int re_match_2_internal ();
334
335/* These are the command codes that appear in compiled regular
336 expressions. Some opcodes are followed by argument bytes. A
337 command code can specify any interpretation whatsoever for its
338 arguments. Zero bytes may appear in the compiled regular expression. */
339
340typedef enum
341{
342 no_op = 0,
343
344 /* Succeed right away--no more backtracking. */
345 succeed,
346
347 /* Followed by one byte giving n, then by n literal bytes. */
348 exactn,
349
350 /* Matches any (more or less) character. */
351 anychar,
352
353 /* Matches any one char belonging to specified set. First
354 following byte is number of bitmap bytes. Then come bytes
355 for a bitmap saying which chars are in. Bits in each byte
356 are ordered low-bit-first. A character is in the set if its
357 bit is 1. A character too large to have a bit in the map is
358 automatically not in the set. */
359 charset,
360
361 /* Same parameters as charset, but match any character that is
362 not one of those specified. */
363 charset_not,
364
365 /* Start remembering the text that is matched, for storing in a
366 register. Followed by one byte with the register number, in
367 the range 0 to one less than the pattern buffer's re_nsub
368 field. Then followed by one byte with the number of groups
369 inner to this one. (This last has to be part of the
370 start_memory only because we need it in the on_failure_jump
371 of re_match_2.) */
372 start_memory,
373
374 /* Stop remembering the text that is matched and store it in a
375 memory register. Followed by one byte with the register
376 number, in the range 0 to one less than `re_nsub' in the
377 pattern buffer, and one byte with the number of inner groups,
378 just like `start_memory'. (We need the number of inner
379 groups here because we don't have any easy way of finding the
380 corresponding start_memory when we're at a stop_memory.) */
381 stop_memory,
382
383 /* Match a duplicate of something remembered. Followed by one
384 byte containing the register number. */
385 duplicate,
386
387 /* Fail unless at beginning of line. */
388 begline,
389
390 /* Fail unless at end of line. */
391 endline,
392
393 /* Succeeds if at beginning of buffer (if emacs) or at beginning
394 of string to be matched (if not). */
395 begbuf,
396
397 /* Analogously, for end of buffer/string. */
398 endbuf,
399
400 /* Followed by two byte relative address to which to jump. */
401 jump,
402
403 /* Same as jump, but marks the end of an alternative. */
404 jump_past_alt,
405
406 /* Followed by two-byte relative address of place to resume at
407 in case of failure. */
408 on_failure_jump,
409
410 /* Like on_failure_jump, but pushes a placeholder instead of the
411 current string position when executed. */
412 on_failure_keep_string_jump,
413
414 /* Throw away latest failure point and then jump to following
415 two-byte relative address. */
416 pop_failure_jump,
417
418 /* Change to pop_failure_jump if know won't have to backtrack to
419 match; otherwise change to jump. This is used to jump
420 back to the beginning of a repeat. If what follows this jump
421 clearly won't match what the repeat does, such that we can be
422 sure that there is no use backtracking out of repetitions
423 already matched, then we change it to a pop_failure_jump.
424 Followed by two-byte address. */
425 maybe_pop_jump,
426
427 /* Jump to following two-byte address, and push a dummy failure
428 point. This failure point will be thrown away if an attempt
429 is made to use it for a failure. A `+' construct makes this
430 before the first repeat. Also used as an intermediary kind
431 of jump when compiling an alternative. */
432 dummy_failure_jump,
433
434 /* Push a dummy failure point and continue. Used at the end of
435 alternatives. */
436 push_dummy_failure,
437
438 /* Followed by two-byte relative address and two-byte number n.
439 After matching N times, jump to the address upon failure. */
440 succeed_n,
441
442 /* Followed by two-byte relative address, and two-byte number n.
443 Jump to the address N times, then fail. */
444 jump_n,
445
446 /* Set the following two-byte relative address to the
447 subsequent two-byte number. The address *includes* the two
448 bytes of number. */
449 set_number_at,
450
451 wordchar, /* Matches any word-constituent character. */
452 notwordchar, /* Matches any char that is not a word-constituent. */
453
454 wordbeg, /* Succeeds if at word beginning. */
455 wordend, /* Succeeds if at word end. */
456
457 wordbound, /* Succeeds if at a word boundary. */
458 notwordbound /* Succeeds if not at a word boundary. */
459
460#ifdef emacs
461 ,before_dot, /* Succeeds if before point. */
462 at_dot, /* Succeeds if at point. */
463 after_dot, /* Succeeds if after point. */
464
465 /* Matches any character whose syntax is specified. Followed by
466 a byte which contains a syntax code, e.g., Sword. */
467 syntaxspec,
468
469 /* Matches any character whose syntax is not that specified. */
470 notsyntaxspec
471#endif /* emacs */
472} re_opcode_t;
473
474/* Common operations on the compiled pattern. */
475
476/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
477
478#define STORE_NUMBER(destination, number) \
479 do { \
480 (destination)[0] = (number) & 0377; \
481 (destination)[1] = (number) >> 8; \
482 } while (0)
483
484/* Same as STORE_NUMBER, except increment DESTINATION to
485 the byte after where the number is stored. Therefore, DESTINATION
486 must be an lvalue. */
487
488#define STORE_NUMBER_AND_INCR(destination, number) \
489 do { \
490 STORE_NUMBER (destination, number); \
491 (destination) += 2; \
492 } while (0)
493
494/* Put into DESTINATION a number stored in two contiguous bytes starting
495 at SOURCE. */
496
497#define EXTRACT_NUMBER(destination, source) \
498 do { \
499 (destination) = *(source) & 0377; \
500 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
501 } while (0)
502
503#ifdef DEBUG
504static void
505extract_number (dest, source)
506 int *dest;
507 unsigned char *source;
508{
509 int temp = SIGN_EXTEND_CHAR (*(source + 1));
510 *dest = *source & 0377;
511 *dest += temp << 8;
512}
513
514#ifndef EXTRACT_MACROS /* To debug the macros. */
515#undef EXTRACT_NUMBER
516#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
517#endif /* not EXTRACT_MACROS */
518
519#endif /* DEBUG */
520
521/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
522 SOURCE must be an lvalue. */
523
524#define EXTRACT_NUMBER_AND_INCR(destination, source) \
525 do { \
526 EXTRACT_NUMBER (destination, source); \
527 (source) += 2; \
528 } while (0)
529
530#ifdef DEBUG
531static void
532extract_number_and_incr (destination, source)
533 int *destination;
534 unsigned char **source;
535{
536 extract_number (destination, *source);
537 *source += 2;
538}
539
540#ifndef EXTRACT_MACROS
541#undef EXTRACT_NUMBER_AND_INCR
542#define EXTRACT_NUMBER_AND_INCR(dest, src) \
543 extract_number_and_incr (&dest, &src)
544#endif /* not EXTRACT_MACROS */
545
546#endif /* DEBUG */
547
548/* If DEBUG is defined, Regex prints many voluminous messages about what
549 it is doing (if the variable `debug' is nonzero). If linked with the
550 main program in `iregex.c', you can enter patterns and strings
551 interactively. And if linked with the main program in `main.c' and
552 the other test files, you can run the already-written tests. */
553
554#ifdef DEBUG
555
556/* We use standard I/O for debugging. */
557#include <stdio.h>
558
559/* It is useful to test things that ``must'' be true when debugging. */
560#include <assert.h>
561
562static int debug = 0;
563
564#define DEBUG_STATEMENT(e) e
565#define DEBUG_PRINT1(x) if (debug) printf (x)
566#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
567#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
568#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
569#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
570 if (debug) print_partial_compiled_pattern (s, e)
571#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
572 if (debug) print_double_string (w, s1, sz1, s2, sz2)
573
574
575/* Print the fastmap in human-readable form. */
576
577void
578print_fastmap (fastmap)
579 char *fastmap;
580{
581 unsigned was_a_range = 0;
582 unsigned i = 0;
583
584 while (i < (1 << BYTEWIDTH))
585 {
586 if (fastmap[i++])
587 {
588 was_a_range = 0;
589 putchar (i - 1);
590 while (i < (1 << BYTEWIDTH) && fastmap[i])
591 {
592 was_a_range = 1;
593 i++;
594 }
595 if (was_a_range)
596 {
597 printf ("-");
598 putchar (i - 1);
599 }
600 }
601 }
602 putchar ('\n');
603}
604
605
606/* Print a compiled pattern string in human-readable form, starting at
607 the START pointer into it and ending just before the pointer END. */
608
609void
610print_partial_compiled_pattern (start, end)
611 unsigned char *start;
612 unsigned char *end;
613{
614 int mcnt, mcnt2;
615 unsigned char *p = start;
616 unsigned char *pend = end;
617
618 if (start == NULL)
619 {
620 printf ("(null)\n");
621 return;
622 }
623
624 /* Loop over pattern commands. */
625 while (p < pend)
626 {
627 printf ("%d:\t", p - start);
628
629 switch ((re_opcode_t) *p++)
630 {
631 case no_op:
632 printf ("/no_op");
633 break;
634
635 case exactn:
636 mcnt = *p++;
637 printf ("/exactn/%d", mcnt);
638 do
639 {
640 putchar ('/');
641 putchar (*p++);
642 }
643 while (--mcnt);
644 break;
645
646 case start_memory:
647 mcnt = *p++;
648 printf ("/start_memory/%d/%d", mcnt, *p++);
649 break;
650
651 case stop_memory:
652 mcnt = *p++;
653 printf ("/stop_memory/%d/%d", mcnt, *p++);
654 break;
655
656 case duplicate:
657 printf ("/duplicate/%d", *p++);
658 break;
659
660 case anychar:
661 printf ("/anychar");
662 break;
663
664 case charset:
665 case charset_not:
666 {
667 register int c, last = -100;
668 register int in_range = 0;
669
670 printf ("/charset [%s",
671 (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
672
673 assert (p + *p < pend);
674
675 for (c = 0; c < 256; c++)
676 if (c / 8 < *p
677 && (p[1 + (c/8)] & (1 << (c % 8))))
678 {
679 /* Are we starting a range? */
680 if (last + 1 == c && ! in_range)
681 {
682 putchar ('-');
683 in_range = 1;
684 }
685 /* Have we broken a range? */
686 else if (last + 1 != c && in_range)
687 {
688 putchar (last);
689 in_range = 0;
690 }
691
692 if (! in_range)
693 putchar (c);
694
695 last = c;
696 }
697
698 if (in_range)
699 putchar (last);
700
701 putchar (']');
702
703 p += 1 + *p;
704 }
705 break;
706
707 case begline:
708 printf ("/begline");
709 break;
710
711 case endline:
712 printf ("/endline");
713 break;
714
715 case on_failure_jump:
716 extract_number_and_incr (&mcnt, &p);
717 printf ("/on_failure_jump to %d", p + mcnt - start);
718 break;
719
720 case on_failure_keep_string_jump:
721 extract_number_and_incr (&mcnt, &p);
722 printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
723 break;
724
725 case dummy_failure_jump:
726 extract_number_and_incr (&mcnt, &p);
727 printf ("/dummy_failure_jump to %d", p + mcnt - start);
728 break;
729
730 case push_dummy_failure:
731 printf ("/push_dummy_failure");
732 break;
733
734 case maybe_pop_jump:
735 extract_number_and_incr (&mcnt, &p);
736 printf ("/maybe_pop_jump to %d", p + mcnt - start);
737 break;
738
739 case pop_failure_jump:
740 extract_number_and_incr (&mcnt, &p);
741 printf ("/pop_failure_jump to %d", p + mcnt - start);
742 break;
743
744 case jump_past_alt:
745 extract_number_and_incr (&mcnt, &p);
746 printf ("/jump_past_alt to %d", p + mcnt - start);
747 break;
748
749 case jump:
750 extract_number_and_incr (&mcnt, &p);
751 printf ("/jump to %d", p + mcnt - start);
752 break;
753
754 case succeed_n:
755 extract_number_and_incr (&mcnt, &p);
756 extract_number_and_incr (&mcnt2, &p);
757 printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
758 break;
759
760 case jump_n:
761 extract_number_and_incr (&mcnt, &p);
762 extract_number_and_incr (&mcnt2, &p);
763 printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
764 break;
765
766 case set_number_at:
767 extract_number_and_incr (&mcnt, &p);
768 extract_number_and_incr (&mcnt2, &p);
769 printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
770 break;
771
772 case wordbound:
773 printf ("/wordbound");
774 break;
775
776 case notwordbound:
777 printf ("/notwordbound");
778 break;
779
780 case wordbeg:
781 printf ("/wordbeg");
782 break;
783
784 case wordend:
785 printf ("/wordend");
786
787#ifdef emacs
788 case before_dot:
789 printf ("/before_dot");
790 break;
791
792 case at_dot:
793 printf ("/at_dot");
794 break;
795
796 case after_dot:
797 printf ("/after_dot");
798 break;
799
800 case syntaxspec:
801 printf ("/syntaxspec");
802 mcnt = *p++;
803 printf ("/%d", mcnt);
804 break;
805
806 case notsyntaxspec:
807 printf ("/notsyntaxspec");
808 mcnt = *p++;
809 printf ("/%d", mcnt);
810 break;
811#endif /* emacs */
812
813 case wordchar:
814 printf ("/wordchar");
815 break;
816
817 case notwordchar:
818 printf ("/notwordchar");
819 break;
820
821 case begbuf:
822 printf ("/begbuf");
823 break;
824
825 case endbuf:
826 printf ("/endbuf");
827 break;
828
829 default:
830 printf ("?%d", *(p-1));
831 }
832
833 putchar ('\n');
834 }
835
836 printf ("%d:\tend of pattern.\n", p - start);
837}
838
839
840void
841print_compiled_pattern (bufp)
842 struct re_pattern_buffer *bufp;
843{
844 unsigned char *buffer = bufp->buffer;
845
846 print_partial_compiled_pattern (buffer, buffer + bufp->used);
847 printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
848
849 if (bufp->fastmap_accurate && bufp->fastmap)
850 {
851 printf ("fastmap: ");
852 print_fastmap (bufp->fastmap);
853 }
854
855 printf ("re_nsub: %d\t", bufp->re_nsub);
856 printf ("regs_alloc: %d\t", bufp->regs_allocated);
857 printf ("can_be_null: %d\t", bufp->can_be_null);
858 printf ("newline_anchor: %d\n", bufp->newline_anchor);
859 printf ("no_sub: %d\t", bufp->no_sub);
860 printf ("not_bol: %d\t", bufp->not_bol);
861 printf ("not_eol: %d\t", bufp->not_eol);
862 printf ("syntax: %d\n", bufp->syntax);
863 /* Perhaps we should print the translate table? */
864}
865
866
867void
868print_double_string (where, string1, size1, string2, size2)
869 const char *where;
870 const char *string1;
871 const char *string2;
872 int size1;
873 int size2;
874{
875 unsigned this_char;
876
877 if (where == NULL)
878 printf ("(null)");
879 else
880 {
881 if (FIRST_STRING_P (where))
882 {
883 for (this_char = where - string1; this_char < size1; this_char++)
884 putchar (string1[this_char]);
885
886 where = string2;
887 }
888
889 for (this_char = where - string2; this_char < size2; this_char++)
890 putchar (string2[this_char]);
891 }
892}
893
894#else /* not DEBUG */
895
896#undef assert
897#define assert(e)
898
899#define DEBUG_STATEMENT(e)
900#define DEBUG_PRINT1(x)
901#define DEBUG_PRINT2(x1, x2)
902#define DEBUG_PRINT3(x1, x2, x3)
903#define DEBUG_PRINT4(x1, x2, x3, x4)
904#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
905#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
906
907#endif /* not DEBUG */
908
909/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
910 also be assigned to arbitrarily: each pattern buffer stores its own
911 syntax, so it can be changed between regex compilations. */
912/* This has no initializer because initialized variables in Emacs
913 become read-only after dumping. */
914reg_syntax_t re_syntax_options;
915
916
917/* Specify the precise syntax of regexps for compilation. This provides
918 for compatibility for various utilities which historically have
919 different, incompatible syntaxes.
920
921 The argument SYNTAX is a bit mask comprised of the various bits
922 defined in regex.h. We return the old syntax. */
923
924reg_syntax_t
925re_set_syntax (syntax)
926 reg_syntax_t syntax;
927{
928 reg_syntax_t ret = re_syntax_options;
929
930 re_syntax_options = syntax;
931 return ret;
932}
933
934/* This table gives an error message for each of the error codes listed
935 in regex.h. Obviously the order here has to be same as there.
936 POSIX doesn't require that we do anything for REG_NOERROR,
937 but why not be nice? */
938
939static const char *re_error_msgid[] =
940 {
941 gettext_noop ("Success"), /* REG_NOERROR */
942 gettext_noop ("No match"), /* REG_NOMATCH */
943 gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
944 gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
945 gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
946 gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
947 gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
948 gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */
949 gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
950 gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
951 gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
952 gettext_noop ("Invalid range end"), /* REG_ERANGE */
953 gettext_noop ("Memory exhausted"), /* REG_ESPACE */
954 gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
955 gettext_noop ("Premature end of regular expression"), /* REG_EEND */
956 gettext_noop ("Regular expression too big"), /* REG_ESIZE */
957 gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
958 };
959
960/* Avoiding alloca during matching, to placate r_alloc. */
961
962/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
963 searching and matching functions should not call alloca. On some
964 systems, alloca is implemented in terms of malloc, and if we're
965 using the relocating allocator routines, then malloc could cause a
966 relocation, which might (if the strings being searched are in the
967 ralloc heap) shift the data out from underneath the regexp
968 routines.
969
970 Here's another reason to avoid allocation: Emacs
971 processes input from X in a signal handler; processing X input may
972 call malloc; if input arrives while a matching routine is calling
973 malloc, then we're scrod. But Emacs can't just block input while
974 calling matching routines; then we don't notice interrupts when
975 they come in. So, Emacs blocks input around all regexp calls
976 except the matching calls, which it leaves unprotected, in the
977 faith that they will not malloc. */
978
979/* Normally, this is fine. */
980#define MATCH_MAY_ALLOCATE
981
982/* When using GNU C, we are not REALLY using the C alloca, no matter
983 what config.h may say. So don't take precautions for it. */
984#ifdef __GNUC__
985#undef C_ALLOCA
986#endif
987
988/* The match routines may not allocate if (1) they would do it with malloc
989 and (2) it's not safe for them to use malloc.
990 Note that if REL_ALLOC is defined, matching would not use malloc for the
991 failure stack, but we would still use it for the register vectors;
992 so REL_ALLOC should not affect this. */
993#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
994#undef MATCH_MAY_ALLOCATE
995#endif
996
997
998/* Failure stack declarations and macros; both re_compile_fastmap and
999 re_match_2 use a failure stack. These have to be macros because of
1000 REGEX_ALLOCATE_STACK. */
1001
1002
1003/* Number of failure points for which to initially allocate space
1004 when matching. If this number is exceeded, we allocate more
1005 space, so it is not a hard limit. */
1006#ifndef INIT_FAILURE_ALLOC
1007#define INIT_FAILURE_ALLOC 5
1008#endif
1009
1010/* Roughly the maximum number of failure points on the stack. Would be
1011 exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1012 This is a variable only so users of regex can assign to it; we never
1013 change it ourselves. */
1014#if defined (MATCH_MAY_ALLOCATE)
1015/* 4400 was enough to cause a crash on Alpha OSF/1,
1016 whose default stack limit is 2mb. */
1017int re_max_failures = 20000;
1018#else
1019int re_max_failures = 2000;
1020#endif
1021
1022union fail_stack_elt
1023{
1024 unsigned char *pointer;
1025 int integer;
1026};
1027
1028typedef union fail_stack_elt fail_stack_elt_t;
1029
1030typedef struct
1031{
1032 fail_stack_elt_t *stack;
1033 unsigned size;
1034 unsigned avail; /* Offset of next open position. */
1035} fail_stack_type;
1036
1037#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
1038#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1039#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
1040
1041
1042/* Define macros to initialize and free the failure stack.
1043 Do `return -2' if the alloc fails. */
1044
1045#ifdef MATCH_MAY_ALLOCATE
1046#define INIT_FAIL_STACK() \
1047 do { \
1048 fail_stack.stack = (fail_stack_elt_t *) \
1049 REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
1050 \
1051 if (fail_stack.stack == NULL) \
1052 return -2; \
1053 \
1054 fail_stack.size = INIT_FAILURE_ALLOC; \
1055 fail_stack.avail = 0; \
1056 } while (0)
1057
1058#define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
1059#else
1060#define INIT_FAIL_STACK() \
1061 do { \
1062 fail_stack.avail = 0; \
1063 } while (0)
1064
1065#define RESET_FAIL_STACK()
1066#endif
1067
1068
1069/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1070
1071 Return 1 if succeeds, and 0 if either ran out of memory
1072 allocating space for it or it was already too large.
1073
1074 REGEX_REALLOCATE_STACK requires `destination' be declared. */
1075
1076#define DOUBLE_FAIL_STACK(fail_stack) \
1077 ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
1078 ? 0 \
1079 : ((fail_stack).stack = (fail_stack_elt_t *) \
1080 REGEX_REALLOCATE_STACK ((fail_stack).stack, \
1081 (fail_stack).size * sizeof (fail_stack_elt_t), \
1082 ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
1083 \
1084 (fail_stack).stack == NULL \
1085 ? 0 \
1086 : ((fail_stack).size <<= 1, \
1087 1)))
1088
1089
1090/* Push pointer POINTER on FAIL_STACK.
1091 Return 1 if was able to do so and 0 if ran out of memory allocating
1092 space to do so. */
1093#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
1094 ((FAIL_STACK_FULL () \
1095 && !DOUBLE_FAIL_STACK (FAIL_STACK)) \
1096 ? 0 \
1097 : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
1098 1))
1099
1100/* Push a pointer value onto the failure stack.
1101 Assumes the variable `fail_stack'. Probably should only
1102 be called from within `PUSH_FAILURE_POINT'. */
1103#define PUSH_FAILURE_POINTER(item) \
1104 fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1105
1106/* This pushes an integer-valued item onto the failure stack.
1107 Assumes the variable `fail_stack'. Probably should only
1108 be called from within `PUSH_FAILURE_POINT'. */
1109#define PUSH_FAILURE_INT(item) \
1110 fail_stack.stack[fail_stack.avail++].integer = (item)
1111
1112/* Push a fail_stack_elt_t value onto the failure stack.
1113 Assumes the variable `fail_stack'. Probably should only
1114 be called from within `PUSH_FAILURE_POINT'. */
1115#define PUSH_FAILURE_ELT(item) \
1116 fail_stack.stack[fail_stack.avail++] = (item)
1117
1118/* These three POP... operations complement the three PUSH... operations.
1119 All assume that `fail_stack' is nonempty. */
1120#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1121#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1122#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1123
1124/* Used to omit pushing failure point id's when we're not debugging. */
1125#ifdef DEBUG
1126#define DEBUG_PUSH PUSH_FAILURE_INT
1127#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1128#else
1129#define DEBUG_PUSH(item)
1130#define DEBUG_POP(item_addr)
1131#endif
1132
1133
1134/* Push the information about the state we will need
1135 if we ever fail back to it.
1136
1137 Requires variables fail_stack, regstart, regend, reg_info, and
1138 num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
1139 declared.
1140
1141 Does `return FAILURE_CODE' if runs out of memory. */
1142
1143#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
1144 do { \
1145 char *destination; \
1146 /* Must be int, so when we don't save any registers, the arithmetic \
1147 of 0 + -1 isn't done as unsigned. */ \
1148 int this_reg; \
1149 \
1150 DEBUG_STATEMENT (failure_id++); \
1151 DEBUG_STATEMENT (nfailure_points_pushed++); \
1152 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
1153 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
1154 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
1155 \
1156 DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
1157 DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
1158 \
1159 /* Ensure we have enough space allocated for what we will push. */ \
1160 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
1161 { \
1162 if (!DOUBLE_FAIL_STACK (fail_stack)) \
1163 return failure_code; \
1164 \
1165 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
1166 (fail_stack).size); \
1167 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1168 } \
1169 \
1170 /* Push the info, starting with the registers. */ \
1171 DEBUG_PRINT1 ("\n"); \
1172 \
1173 if (1) \
1174 for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1175 this_reg++) \
1176 { \
1177 DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
1178 DEBUG_STATEMENT (num_regs_pushed++); \
1179 \
1180 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
1181 PUSH_FAILURE_POINTER (regstart[this_reg]); \
1182 \
1183 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
1184 PUSH_FAILURE_POINTER (regend[this_reg]); \
1185 \
1186 DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
1187 DEBUG_PRINT2 (" match_null=%d", \
1188 REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
1189 DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
1190 DEBUG_PRINT2 (" matched_something=%d", \
1191 MATCHED_SOMETHING (reg_info[this_reg])); \
1192 DEBUG_PRINT2 (" ever_matched=%d", \
1193 EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
1194 DEBUG_PRINT1 ("\n"); \
1195 PUSH_FAILURE_ELT (reg_info[this_reg].word); \
1196 } \
1197 \
1198 DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
1199 PUSH_FAILURE_INT (lowest_active_reg); \
1200 \
1201 DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
1202 PUSH_FAILURE_INT (highest_active_reg); \
1203 \
1204 DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
1205 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
1206 PUSH_FAILURE_POINTER (pattern_place); \
1207 \
1208 DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
1209 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
1210 size2); \
1211 DEBUG_PRINT1 ("'\n"); \
1212 PUSH_FAILURE_POINTER (string_place); \
1213 \
1214 DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
1215 DEBUG_PUSH (failure_id); \
1216 } while (0)
1217
1218/* This is the number of items that are pushed and popped on the stack
1219 for each register. */
1220#define NUM_REG_ITEMS 3
1221
1222/* Individual items aside from the registers. */
1223#ifdef DEBUG
1224#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
1225#else
1226#define NUM_NONREG_ITEMS 4
1227#endif
1228
1229/* We push at most this many items on the stack. */
1230/* We used to use (num_regs - 1), which is the number of registers
1231 this regexp will save; but that was changed to 5
1232 to avoid stack overflow for a regexp with lots of parens. */
1233#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1234
1235/* We actually push this many items. */
1236#define NUM_FAILURE_ITEMS \
1237 (((0 \
1238 ? 0 : highest_active_reg - lowest_active_reg + 1) \
1239 * NUM_REG_ITEMS) \
1240 + NUM_NONREG_ITEMS)
1241
1242/* How many items can still be added to the stack without overflowing it. */
1243#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1244
1245
1246/* Pops what PUSH_FAIL_STACK pushes.
1247
1248 We restore into the parameters, all of which should be lvalues:
1249 STR -- the saved data position.
1250 PAT -- the saved pattern position.
1251 LOW_REG, HIGH_REG -- the highest and lowest active registers.
1252 REGSTART, REGEND -- arrays of string positions.
1253 REG_INFO -- array of information about each subexpression.
1254
1255 Also assumes the variables `fail_stack' and (if debugging), `bufp',
1256 `pend', `string1', `size1', `string2', and `size2'. */
1257
1258#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1259{ \
1260 DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
1261 int this_reg; \
1262 const unsigned char *string_temp; \
1263 \
1264 assert (!FAIL_STACK_EMPTY ()); \
1265 \
1266 /* Remove failure points and point to how many regs pushed. */ \
1267 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
1268 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
1269 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
1270 \
1271 assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
1272 \
1273 DEBUG_POP (&failure_id); \
1274 DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
1275 \
1276 /* If the saved string location is NULL, it came from an \
1277 on_failure_keep_string_jump opcode, and we want to throw away the \
1278 saved NULL, thus retaining our current position in the string. */ \
1279 string_temp = POP_FAILURE_POINTER (); \
1280 if (string_temp != NULL) \
1281 str = (const char *) string_temp; \
1282 \
1283 DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
1284 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
1285 DEBUG_PRINT1 ("'\n"); \
1286 \
1287 pat = (unsigned char *) POP_FAILURE_POINTER (); \
1288 DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \
1289 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
1290 \
1291 /* Restore register info. */ \
1292 high_reg = (unsigned) POP_FAILURE_INT (); \
1293 DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
1294 \
1295 low_reg = (unsigned) POP_FAILURE_INT (); \
1296 DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
1297 \
1298 if (1) \
1299 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
1300 { \
1301 DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
1302 \
1303 reg_info[this_reg].word = POP_FAILURE_ELT (); \
1304 DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
1305 \
1306 regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \
1307 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
1308 \
1309 regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
1310 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
1311 } \
1312 else \
1313 { \
1314 for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1315 { \
1316 reg_info[this_reg].word.integer = 0; \
1317 regend[this_reg] = 0; \
1318 regstart[this_reg] = 0; \
1319 } \
1320 highest_active_reg = high_reg; \
1321 } \
1322 \
1323 set_regs_matched_done = 0; \
1324 DEBUG_STATEMENT (nfailure_points_popped++); \
1325} /* POP_FAILURE_POINT */
1326
1327
1328
1329/* Structure for per-register (a.k.a. per-group) information.
1330 Other register information, such as the
1331 starting and ending positions (which are addresses), and the list of
1332 inner groups (which is a bits list) are maintained in separate
1333 variables.
1334
1335 We are making a (strictly speaking) nonportable assumption here: that
1336 the compiler will pack our bit fields into something that fits into
1337 the type of `word', i.e., is something that fits into one item on the
1338 failure stack. */
1339
1340typedef union
1341{
1342 fail_stack_elt_t word;
1343 struct
1344 {
1345 /* This field is one if this group can match the empty string,
1346 zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
1347#define MATCH_NULL_UNSET_VALUE 3
1348 unsigned match_null_string_p : 2;
1349 unsigned is_active : 1;
1350 unsigned matched_something : 1;
1351 unsigned ever_matched_something : 1;
1352 } bits;
1353} register_info_type;
1354
1355#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
1356#define IS_ACTIVE(R) ((R).bits.is_active)
1357#define MATCHED_SOMETHING(R) ((R).bits.matched_something)
1358#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
1359
1360
1361/* Call this when have matched a real character; it sets `matched' flags
1362 for the subexpressions which we are currently inside. Also records
1363 that those subexprs have matched. */
1364#define SET_REGS_MATCHED() \
1365 do \
1366 { \
1367 if (!set_regs_matched_done) \
1368 { \
1369 unsigned r; \
1370 set_regs_matched_done = 1; \
1371 for (r = lowest_active_reg; r <= highest_active_reg; r++) \
1372 { \
1373 MATCHED_SOMETHING (reg_info[r]) \
1374 = EVER_MATCHED_SOMETHING (reg_info[r]) \
1375 = 1; \
1376 } \
1377 } \
1378 } \
1379 while (0)
1380
1381/* Registers are set to a sentinel when they haven't yet matched. */
1382static char reg_unset_dummy;
1383#define REG_UNSET_VALUE (&reg_unset_dummy)
1384#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1385
1386/* Subroutine declarations and macros for regex_compile. */
1387
1388static void store_op1 (), store_op2 ();
1389static void insert_op1 (), insert_op2 ();
1390static boolean at_begline_loc_p (), at_endline_loc_p ();
1391static boolean group_in_compile_stack ();
1392static reg_errcode_t compile_range ();
1393
1394/* Fetch the next character in the uncompiled pattern---translating it
1395 if necessary. Also cast from a signed character in the constant
1396 string passed to us by the user to an unsigned char that we can use
1397 as an array index (in, e.g., `translate'). */
1398#ifndef PATFETCH
1399#define PATFETCH(c) \
1400 do {if (p == pend) return REG_EEND; \
1401 c = (unsigned char) *p++; \
1402 if (translate) c = (unsigned char) translate[c]; \
1403 } while (0)
1404#endif
1405
1406/* Fetch the next character in the uncompiled pattern, with no
1407 translation. */
1408#define PATFETCH_RAW(c) \
1409 do {if (p == pend) return REG_EEND; \
1410 c = (unsigned char) *p++; \
1411 } while (0)
1412
1413/* Go backwards one character in the pattern. */
1414#define PATUNFETCH p--
1415
1416
1417/* If `translate' is non-null, return translate[D], else just D. We
1418 cast the subscript to translate because some data is declared as
1419 `char *', to avoid warnings when a string constant is passed. But
1420 when we use a character as a subscript we must make it unsigned. */
1421#ifndef TRANSLATE
1422#define TRANSLATE(d) \
1423 (translate ? (char) translate[(unsigned char) (d)] : (d))
1424#endif
1425
1426
1427/* Macros for outputting the compiled pattern into `buffer'. */
1428
1429/* If the buffer isn't allocated when it comes in, use this. */
1430#define INIT_BUF_SIZE 32
1431
1432/* Make sure we have at least N more bytes of space in buffer. */
1433#define GET_BUFFER_SPACE(n) \
1434 while (b - bufp->buffer + (n) > bufp->allocated) \
1435 EXTEND_BUFFER ()
1436
1437/* Make sure we have one more byte of buffer space and then add C to it. */
1438#define BUF_PUSH(c) \
1439 do { \
1440 GET_BUFFER_SPACE (1); \
1441 *b++ = (unsigned char) (c); \
1442 } while (0)
1443
1444
1445/* Ensure we have two more bytes of buffer space and then append C1 and C2. */
1446#define BUF_PUSH_2(c1, c2) \
1447 do { \
1448 GET_BUFFER_SPACE (2); \
1449 *b++ = (unsigned char) (c1); \
1450 *b++ = (unsigned char) (c2); \
1451 } while (0)
1452
1453
1454/* As with BUF_PUSH_2, except for three bytes. */
1455#define BUF_PUSH_3(c1, c2, c3) \
1456 do { \
1457 GET_BUFFER_SPACE (3); \
1458 *b++ = (unsigned char) (c1); \
1459 *b++ = (unsigned char) (c2); \
1460 *b++ = (unsigned char) (c3); \
1461 } while (0)
1462
1463
1464/* Store a jump with opcode OP at LOC to location TO. We store a
1465 relative address offset by the three bytes the jump itself occupies. */
1466#define STORE_JUMP(op, loc, to) \
1467 store_op1 (op, loc, (to) - (loc) - 3)
1468
1469/* Likewise, for a two-argument jump. */
1470#define STORE_JUMP2(op, loc, to, arg) \
1471 store_op2 (op, loc, (to) - (loc) - 3, arg)
1472
1473/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
1474#define INSERT_JUMP(op, loc, to) \
1475 insert_op1 (op, loc, (to) - (loc) - 3, b)
1476
1477/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
1478#define INSERT_JUMP2(op, loc, to, arg) \
1479 insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
1480
1481
1482/* This is not an arbitrary limit: the arguments which represent offsets
1483 into the pattern are two bytes long. So if 2^16 bytes turns out to
1484 be too small, many things would have to change. */
1485#define MAX_BUF_SIZE (1L << 16)
1486
1487
1488/* Extend the buffer by twice its current size via realloc and
1489 reset the pointers that pointed into the old block to point to the
1490 correct places in the new one. If extending the buffer results in it
1491 being larger than MAX_BUF_SIZE, then flag memory exhausted. */
1492#define EXTEND_BUFFER() \
1493 do { \
1494 unsigned char *old_buffer = bufp->buffer; \
1495 if (bufp->allocated == MAX_BUF_SIZE) \
1496 return REG_ESIZE; \
1497 bufp->allocated <<= 1; \
1498 if (bufp->allocated > MAX_BUF_SIZE) \
1499 bufp->allocated = MAX_BUF_SIZE; \
1500 bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
1501 if (bufp->buffer == NULL) \
1502 return REG_ESPACE; \
1503 /* If the buffer moved, move all the pointers into it. */ \
1504 if (old_buffer != bufp->buffer) \
1505 { \
1506 b = (b - old_buffer) + bufp->buffer; \
1507 begalt = (begalt - old_buffer) + bufp->buffer; \
1508 if (fixup_alt_jump) \
1509 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1510 if (laststart) \
1511 laststart = (laststart - old_buffer) + bufp->buffer; \
1512 if (pending_exact) \
1513 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
1514 } \
1515 } while (0)
1516
1517
1518/* Since we have one byte reserved for the register number argument to
1519 {start,stop}_memory, the maximum number of groups we can report
1520 things about is what fits in that byte. */
1521#define MAX_REGNUM 255
1522
1523/* But patterns can have more than `MAX_REGNUM' registers. We just
1524 ignore the excess. */
1525typedef unsigned regnum_t;
1526
1527
1528/* Macros for the compile stack. */
1529
1530/* Since offsets can go either forwards or backwards, this type needs to
1531 be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
1532typedef int pattern_offset_t;
1533
1534typedef struct
1535{
1536 pattern_offset_t begalt_offset;
1537 pattern_offset_t fixup_alt_jump;
1538 pattern_offset_t inner_group_offset;
1539 pattern_offset_t laststart_offset;
1540 regnum_t regnum;
1541} compile_stack_elt_t;
1542
1543
1544typedef struct
1545{
1546 compile_stack_elt_t *stack;
1547 unsigned size;
1548 unsigned avail; /* Offset of next open position. */
1549} compile_stack_type;
1550
1551
1552#define INIT_COMPILE_STACK_SIZE 32
1553
1554#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
1555#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
1556
1557/* The next available element. */
1558#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1559
1560
1561/* Set the bit for character C in a list. */
1562#define SET_LIST_BIT(c) \
1563 (b[((unsigned char) (c)) / BYTEWIDTH] \
1564 |= 1 << (((unsigned char) c) % BYTEWIDTH))
1565
1566
1567/* Get the next unsigned number in the uncompiled pattern. */
1568#define GET_UNSIGNED_NUMBER(num) \
1569 { if (p != pend) \
1570 { \
1571 PATFETCH (c); \
1572 while (ISDIGIT (c)) \
1573 { \
1574 if (num < 0) \
1575 num = 0; \
1576 num = num * 10 + c - '0'; \
1577 if (p == pend) \
1578 break; \
1579 PATFETCH (c); \
1580 } \
1581 } \
1582 }
1583
1584#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
1585
1586#define IS_CHAR_CLASS(string) \
1587 (STREQ (string, "alpha") || STREQ (string, "upper") \
1588 || STREQ (string, "lower") || STREQ (string, "digit") \
1589 || STREQ (string, "alnum") || STREQ (string, "xdigit") \
1590 || STREQ (string, "space") || STREQ (string, "print") \
1591 || STREQ (string, "punct") || STREQ (string, "graph") \
1592 || STREQ (string, "cntrl") || STREQ (string, "blank"))
1593
1594#ifndef MATCH_MAY_ALLOCATE
1595
1596/* If we cannot allocate large objects within re_match_2_internal,
1597 we make the fail stack and register vectors global.
1598 The fail stack, we grow to the maximum size when a regexp
1599 is compiled.
1600 The register vectors, we adjust in size each time we
1601 compile a regexp, according to the number of registers it needs. */
1602
1603static fail_stack_type fail_stack;
1604
1605/* Size with which the following vectors are currently allocated.
1606 That is so we can make them bigger as needed,
1607 but never make them smaller. */
1608static int regs_allocated_size;
1609
1610static const char ** regstart, ** regend;
1611static const char ** old_regstart, ** old_regend;
1612static const char **best_regstart, **best_regend;
1613static register_info_type *reg_info;
1614static const char **reg_dummy;
1615static register_info_type *reg_info_dummy;
1616
1617/* Make the register vectors big enough for NUM_REGS registers,
1618 but don't make them smaller. */
1619
1620static
1621regex_grow_registers (num_regs)
1622 int num_regs;
1623{
1624 if (num_regs > regs_allocated_size)
1625 {
1626 RETALLOC_IF (regstart, num_regs, const char *);
1627 RETALLOC_IF (regend, num_regs, const char *);
1628 RETALLOC_IF (old_regstart, num_regs, const char *);
1629 RETALLOC_IF (old_regend, num_regs, const char *);
1630 RETALLOC_IF (best_regstart, num_regs, const char *);
1631 RETALLOC_IF (best_regend, num_regs, const char *);
1632 RETALLOC_IF (reg_info, num_regs, register_info_type);
1633 RETALLOC_IF (reg_dummy, num_regs, const char *);
1634 RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
1635
1636 regs_allocated_size = num_regs;
1637 }
1638}
1639
1640#endif /* not MATCH_MAY_ALLOCATE */
1641
1642/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1643 Returns one of error codes defined in `regex.h', or zero for success.
1644
1645 Assumes the `allocated' (and perhaps `buffer') and `translate'
1646 fields are set in BUFP on entry.
1647
1648 If it succeeds, results are put in BUFP (if it returns an error, the
1649 contents of BUFP are undefined):
1650 `buffer' is the compiled pattern;
1651 `syntax' is set to SYNTAX;
1652 `used' is set to the length of the compiled pattern;
1653 `fastmap_accurate' is zero;
1654 `re_nsub' is the number of subexpressions in PATTERN;
1655 `not_bol' and `not_eol' are zero;
1656
1657 The `fastmap' and `newline_anchor' fields are neither
1658 examined nor set. */
1659
1660/* Return, freeing storage we allocated. */
1661#define FREE_STACK_RETURN(value) \
1662 return (free (compile_stack.stack), value)
1663
1664static reg_errcode_t
1665regex_compile (pattern, size, syntax, bufp)
1666 const char *pattern;
1667 int size;
1668 reg_syntax_t syntax;
1669 struct re_pattern_buffer *bufp;
1670{
1671 /* We fetch characters from PATTERN here. Even though PATTERN is
1672 `char *' (i.e., signed), we declare these variables as unsigned, so
1673 they can be reliably used as array indices. */
1674 register unsigned char c, c1;
1675
1676 /* A random temporary spot in PATTERN. */
1677 const char *p1;
1678
1679 /* Points to the end of the buffer, where we should append. */
1680 register unsigned char *b;
1681
1682 /* Keeps track of unclosed groups. */
1683 compile_stack_type compile_stack;
1684
1685 /* Points to the current (ending) position in the pattern. */
1686 const char *p = pattern;
1687 const char *pend = pattern + size;
1688
1689 /* How to translate the characters in the pattern. */
1690 RE_TRANSLATE_TYPE translate = bufp->translate;
1691
1692 /* Address of the count-byte of the most recently inserted `exactn'
1693 command. This makes it possible to tell if a new exact-match
1694 character can be added to that command or if the character requires
1695 a new `exactn' command. */
1696 unsigned char *pending_exact = 0;
1697
1698 /* Address of start of the most recently finished expression.
1699 This tells, e.g., postfix * where to find the start of its
1700 operand. Reset at the beginning of groups and alternatives. */
1701 unsigned char *laststart = 0;
1702
1703 /* Address of beginning of regexp, or inside of last group. */
1704 unsigned char *begalt;
1705
1706 /* Place in the uncompiled pattern (i.e., the {) to
1707 which to go back if the interval is invalid. */
1708 const char *beg_interval;
1709
1710 /* Address of the place where a forward jump should go to the end of
1711 the containing expression. Each alternative of an `or' -- except the
1712 last -- ends with a forward jump of this sort. */
1713 unsigned char *fixup_alt_jump = 0;
1714
1715 /* Counts open-groups as they are encountered. Remembered for the
1716 matching close-group on the compile stack, so the same register
1717 number is put in the stop_memory as the start_memory. */
1718 regnum_t regnum = 0;
1719
1720#ifdef DEBUG
1721 DEBUG_PRINT1 ("\nCompiling pattern: ");
1722 if (debug)
1723 {
1724 unsigned debug_count;
1725
1726 for (debug_count = 0; debug_count < size; debug_count++)
1727 putchar (pattern[debug_count]);
1728 putchar ('\n');
1729 }
1730#endif /* DEBUG */
1731
1732 /* Initialize the compile stack. */
1733 compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1734 if (compile_stack.stack == NULL)
1735 return REG_ESPACE;
1736
1737 compile_stack.size = INIT_COMPILE_STACK_SIZE;
1738 compile_stack.avail = 0;
1739
1740 /* Initialize the pattern buffer. */
1741 bufp->syntax = syntax;
1742 bufp->fastmap_accurate = 0;
1743 bufp->not_bol = bufp->not_eol = 0;
1744
1745 /* Set `used' to zero, so that if we return an error, the pattern
1746 printer (for debugging) will think there's no pattern. We reset it
1747 at the end. */
1748 bufp->used = 0;
1749
1750 /* Always count groups, whether or not bufp->no_sub is set. */
1751 bufp->re_nsub = 0;
1752
1753#if !defined (emacs) && !defined (SYNTAX_TABLE)
1754 /* Initialize the syntax table. */
1755 init_syntax_once ();
1756#endif
1757
1758 if (bufp->allocated == 0)
1759 {
1760 if (bufp->buffer)
1761 { /* If zero allocated, but buffer is non-null, try to realloc
1762 enough space. This loses if buffer's address is bogus, but
1763 that is the user's responsibility. */
1764 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1765 }
1766 else
1767 { /* Caller did not allocate a buffer. Do it for them. */
1768 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1769 }
1770 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
1771
1772 bufp->allocated = INIT_BUF_SIZE;
1773 }
1774
1775 begalt = b = bufp->buffer;
1776
1777 /* Loop through the uncompiled pattern until we're at the end. */
1778 while (p != pend)
1779 {
1780 PATFETCH (c);
1781
1782 switch (c)
1783 {
1784 case '^':
1785 {
1786 if ( /* If at start of pattern, it's an operator. */
1787 p == pattern + 1
1788 /* If context independent, it's an operator. */
1789 || syntax & RE_CONTEXT_INDEP_ANCHORS
1790 /* Otherwise, depends on what's come before. */
1791 || at_begline_loc_p (pattern, p, syntax))
1792 BUF_PUSH (begline);
1793 else
1794 goto normal_char;
1795 }
1796 break;
1797
1798
1799 case '$':
1800 {
1801 if ( /* If at end of pattern, it's an operator. */
1802 p == pend
1803 /* If context independent, it's an operator. */
1804 || syntax & RE_CONTEXT_INDEP_ANCHORS
1805 /* Otherwise, depends on what's next. */
1806 || at_endline_loc_p (p, pend, syntax))
1807 BUF_PUSH (endline);
1808 else
1809 goto normal_char;
1810 }
1811 break;
1812
1813
1814 case '+':
1815 case '?':
1816 if ((syntax & RE_BK_PLUS_QM)
1817 || (syntax & RE_LIMITED_OPS))
1818 goto normal_char;
1819 handle_plus:
1820 case '*':
1821 /* If there is no previous pattern... */
1822 if (!laststart)
1823 {
1824 if (syntax & RE_CONTEXT_INVALID_OPS)
1825 FREE_STACK_RETURN (REG_BADRPT);
1826 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1827 goto normal_char;
1828 }
1829
1830 {
1831 /* Are we optimizing this jump? */
1832 boolean keep_string_p = false;
1833
1834 /* 1 means zero (many) matches is allowed. */
1835 char zero_times_ok = 0, many_times_ok = 0;
1836
1837 /* If there is a sequence of repetition chars, collapse it
1838 down to just one (the right one). We can't combine
1839 interval operators with these because of, e.g., `a{2}*',
1840 which should only match an even number of `a's. */
1841
1842 for (;;)
1843 {
1844 zero_times_ok |= c != '+';
1845 many_times_ok |= c != '?';
1846
1847 if (p == pend)
1848 break;
1849
1850 PATFETCH (c);
1851
1852 if (c == '*'
1853 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1854 ;
1855
1856 else if (syntax & RE_BK_PLUS_QM && c == '\\')
1857 {
1858 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
1859
1860 PATFETCH (c1);
1861 if (!(c1 == '+' || c1 == '?'))
1862 {
1863 PATUNFETCH;
1864 PATUNFETCH;
1865 break;
1866 }
1867
1868 c = c1;
1869 }
1870 else
1871 {
1872 PATUNFETCH;
1873 break;
1874 }
1875
1876 /* If we get here, we found another repeat character. */
1877 }
1878
1879 /* Star, etc. applied to an empty pattern is equivalent
1880 to an empty pattern. */
1881 if (!laststart)
1882 break;
1883
1884 /* Now we know whether or not zero matches is allowed
1885 and also whether or not two or more matches is allowed. */
1886 if (many_times_ok)
1887 { /* More than one repetition is allowed, so put in at the
1888 end a backward relative jump from `b' to before the next
1889 jump we're going to put in below (which jumps from
1890 laststart to after this jump).
1891
1892 But if we are at the `*' in the exact sequence `.*\n',
1893 insert an unconditional jump backwards to the .,
1894 instead of the beginning of the loop. This way we only
1895 push a failure point once, instead of every time
1896 through the loop. */
1897 assert (p - 1 > pattern);
1898
1899 /* Allocate the space for the jump. */
1900 GET_BUFFER_SPACE (3);
1901
1902 /* We know we are not at the first character of the pattern,
1903 because laststart was nonzero. And we've already
1904 incremented `p', by the way, to be the character after
1905 the `*'. Do we have to do something analogous here
1906 for null bytes, because of RE_DOT_NOT_NULL? */
1907 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
1908 && zero_times_ok
1909 && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
1910 && !(syntax & RE_DOT_NEWLINE))
1911 { /* We have .*\n. */
1912 STORE_JUMP (jump, b, laststart);
1913 keep_string_p = true;
1914 }
1915 else
1916 /* Anything else. */
1917 STORE_JUMP (maybe_pop_jump, b, laststart - 3);
1918
1919 /* We've added more stuff to the buffer. */
1920 b += 3;
1921 }
1922
1923 /* On failure, jump from laststart to b + 3, which will be the
1924 end of the buffer after this jump is inserted. */
1925 GET_BUFFER_SPACE (3);
1926 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
1927 : on_failure_jump,
1928 laststart, b + 3);
1929 pending_exact = 0;
1930 b += 3;
1931
1932 if (!zero_times_ok)
1933 {
1934 /* At least one repetition is required, so insert a
1935 `dummy_failure_jump' before the initial
1936 `on_failure_jump' instruction of the loop. This
1937 effects a skip over that instruction the first time
1938 we hit that loop. */
1939 GET_BUFFER_SPACE (3);
1940 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
1941 b += 3;
1942 }
1943 }
1944 break;
1945
1946
1947 case '.':
1948 laststart = b;
1949 BUF_PUSH (anychar);
1950 break;
1951
1952
1953 case '[':
1954 {
1955 boolean had_char_class = false;
1956
1957 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
1958
1959 /* Ensure that we have enough space to push a charset: the
1960 opcode, the length count, and the bitset; 34 bytes in all. */
1961 GET_BUFFER_SPACE (34);
1962
1963 laststart = b;
1964
1965 /* We test `*p == '^' twice, instead of using an if
1966 statement, so we only need one BUF_PUSH. */
1967 BUF_PUSH (*p == '^' ? charset_not : charset);
1968 if (*p == '^')
1969 p++;
1970
1971 /* Remember the first position in the bracket expression. */
1972 p1 = p;
1973
1974 /* Push the number of bytes in the bitmap. */
1975 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
1976
1977 /* Clear the whole map. */
1978 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
1979
1980 /* charset_not matches newline according to a syntax bit. */
1981 if ((re_opcode_t) b[-2] == charset_not
1982 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1983 SET_LIST_BIT ('\n');
1984
1985 /* Read in characters and ranges, setting map bits. */
1986 for (;;)
1987 {
1988 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
1989
1990 PATFETCH (c);
1991
1992 /* \ might escape characters inside [...] and [^...]. */
1993 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
1994 {
1995 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
1996
1997 PATFETCH (c1);
1998 SET_LIST_BIT (c1);
1999 continue;
2000 }
2001
2002 /* Could be the end of the bracket expression. If it's
2003 not (i.e., when the bracket expression is `[]' so
2004 far), the ']' character bit gets set way below. */
2005 if (c == ']' && p != p1 + 1)
2006 break;
2007
2008 /* Look ahead to see if it's a range when the last thing
2009 was a character class. */
2010 if (had_char_class && c == '-' && *p != ']')
2011 FREE_STACK_RETURN (REG_ERANGE);
2012
2013 /* Look ahead to see if it's a range when the last thing
2014 was a character: if this is a hyphen not at the
2015 beginning or the end of a list, then it's the range
2016 operator. */
2017 if (c == '-'
2018 && !(p - 2 >= pattern && p[-2] == '[')
2019 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2020 && *p != ']')
2021 {
2022 reg_errcode_t ret
2023 = compile_range (&p, pend, translate, syntax, b);
2024 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2025 }
2026
2027 else if (p[0] == '-' && p[1] != ']')
2028 { /* This handles ranges made up of characters only. */
2029 reg_errcode_t ret;
2030
2031 /* Move past the `-'. */
2032 PATFETCH (c1);
2033
2034 ret = compile_range (&p, pend, translate, syntax, b);
2035 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2036 }
2037
2038 /* See if we're at the beginning of a possible character
2039 class. */
2040
2041 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2042 { /* Leave room for the null. */
2043 char str[CHAR_CLASS_MAX_LENGTH + 1];
2044
2045 PATFETCH (c);
2046 c1 = 0;
2047
2048 /* If pattern is `[[:'. */
2049 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2050
2051 for (;;)
2052 {
2053 PATFETCH (c);
2054 if (c == ':' || c == ']' || p == pend
2055 || c1 == CHAR_CLASS_MAX_LENGTH)
2056 break;
2057 str[c1++] = c;
2058 }
2059 str[c1] = '\0';
2060
2061 /* If isn't a word bracketed by `[:' and:`]':
2062 undo the ending character, the letters, and leave
2063 the leading `:' and `[' (but set bits for them). */
2064 if (c == ':' && *p == ']')
2065 {
2066 int ch;
2067 boolean is_alnum = STREQ (str, "alnum");
2068 boolean is_alpha = STREQ (str, "alpha");
2069 boolean is_blank = STREQ (str, "blank");
2070 boolean is_cntrl = STREQ (str, "cntrl");
2071 boolean is_digit = STREQ (str, "digit");
2072 boolean is_graph = STREQ (str, "graph");
2073 boolean is_lower = STREQ (str, "lower");
2074 boolean is_print = STREQ (str, "print");
2075 boolean is_punct = STREQ (str, "punct");
2076 boolean is_space = STREQ (str, "space");
2077 boolean is_upper = STREQ (str, "upper");
2078 boolean is_xdigit = STREQ (str, "xdigit");
2079
2080 if (!IS_CHAR_CLASS (str))
2081 FREE_STACK_RETURN (REG_ECTYPE);
2082
2083 /* Throw away the ] at the end of the character
2084 class. */
2085 PATFETCH (c);
2086
2087 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2088
2089 for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2090 {
2091 int translated = TRANSLATE (ch);
2092 /* This was split into 3 if's to
2093 avoid an arbitrary limit in some compiler. */
2094 if ( (is_alnum && ISALNUM (ch))
2095 || (is_alpha && ISALPHA (ch))
2096 || (is_blank && ISBLANK (ch))
2097 || (is_cntrl && ISCNTRL (ch)))
2098 SET_LIST_BIT (translated);
2099 if ( (is_digit && ISDIGIT (ch))
2100 || (is_graph && ISGRAPH (ch))
2101 || (is_lower && ISLOWER (ch))
2102 || (is_print && ISPRINT (ch)))
2103 SET_LIST_BIT (translated);
2104 if ( (is_punct && ISPUNCT (ch))
2105 || (is_space && ISSPACE (ch))
2106 || (is_upper && ISUPPER (ch))
2107 || (is_xdigit && ISXDIGIT (ch)))
2108 SET_LIST_BIT (translated);
2109 }
2110 had_char_class = true;
2111 }
2112 else
2113 {
2114 c1++;
2115 while (c1--)
2116 PATUNFETCH;
2117 SET_LIST_BIT ('[');
2118 SET_LIST_BIT (':');
2119 had_char_class = false;
2120 }
2121 }
2122 else
2123 {
2124 had_char_class = false;
2125 SET_LIST_BIT (c);
2126 }
2127 }
2128
2129 /* Discard any (non)matching list bytes that are all 0 at the
2130 end of the map. Decrease the map-length byte too. */
2131 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2132 b[-1]--;
2133 b += b[-1];
2134 }
2135 break;
2136
2137
2138 case '(':
2139 if (syntax & RE_NO_BK_PARENS)
2140 goto handle_open;
2141 else
2142 goto normal_char;
2143
2144
2145 case ')':
2146 if (syntax & RE_NO_BK_PARENS)
2147 goto handle_close;
2148 else
2149 goto normal_char;
2150
2151
2152 case '\n':
2153 if (syntax & RE_NEWLINE_ALT)
2154 goto handle_alt;
2155 else
2156 goto normal_char;
2157
2158
2159 case '|':
2160 if (syntax & RE_NO_BK_VBAR)
2161 goto handle_alt;
2162 else
2163 goto normal_char;
2164
2165
2166 case '{':
2167 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2168 goto handle_interval;
2169 else
2170 goto normal_char;
2171
2172
2173 case '\\':
2174 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2175
2176 /* Do not translate the character after the \, so that we can
2177 distinguish, e.g., \B from \b, even if we normally would
2178 translate, e.g., B to b. */
2179 PATFETCH_RAW (c);
2180
2181 switch (c)
2182 {
2183 case '(':
2184 if (syntax & RE_NO_BK_PARENS)
2185 goto normal_backslash;
2186
2187 handle_open:
2188 bufp->re_nsub++;
2189 regnum++;
2190
2191 if (COMPILE_STACK_FULL)
2192 {
2193 RETALLOC (compile_stack.stack, compile_stack.size << 1,
2194 compile_stack_elt_t);
2195 if (compile_stack.stack == NULL) return REG_ESPACE;
2196
2197 compile_stack.size <<= 1;
2198 }
2199
2200 /* These are the values to restore when we hit end of this
2201 group. They are all relative offsets, so that if the
2202 whole pattern moves because of realloc, they will still
2203 be valid. */
2204 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2205 COMPILE_STACK_TOP.fixup_alt_jump
2206 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2207 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2208 COMPILE_STACK_TOP.regnum = regnum;
2209
2210 /* We will eventually replace the 0 with the number of
2211 groups inner to this one. But do not push a
2212 start_memory for groups beyond the last one we can
2213 represent in the compiled pattern. */
2214 if (regnum <= MAX_REGNUM)
2215 {
2216 COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2217 BUF_PUSH_3 (start_memory, regnum, 0);
2218 }
2219
2220 compile_stack.avail++;
2221
2222 fixup_alt_jump = 0;
2223 laststart = 0;
2224 begalt = b;
2225 /* If we've reached MAX_REGNUM groups, then this open
2226 won't actually generate any code, so we'll have to
2227 clear pending_exact explicitly. */
2228 pending_exact = 0;
2229 break;
2230
2231
2232 case ')':
2233 if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2234
2235 if (COMPILE_STACK_EMPTY)
2236 {
2237 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2238 goto normal_backslash;
2239 else
2240 FREE_STACK_RETURN (REG_ERPAREN);
2241 }
2242
2243 handle_close:
2244 if (fixup_alt_jump)
2245 { /* Push a dummy failure point at the end of the
2246 alternative for a possible future
2247 `pop_failure_jump' to pop. See comments at
2248 `push_dummy_failure' in `re_match_2'. */
2249 BUF_PUSH (push_dummy_failure);
2250
2251 /* We allocated space for this jump when we assigned
2252 to `fixup_alt_jump', in the `handle_alt' case below. */
2253 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2254 }
2255
2256 /* See similar code for backslashed left paren above. */
2257 if (COMPILE_STACK_EMPTY)
2258 {
2259 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2260 goto normal_char;
2261 else
2262 FREE_STACK_RETURN (REG_ERPAREN);
2263 }
2264
2265 /* Since we just checked for an empty stack above, this
2266 ``can't happen''. */
2267 assert (compile_stack.avail != 0);
2268 {
2269 /* We don't just want to restore into `regnum', because
2270 later groups should continue to be numbered higher,
2271 as in `(ab)c(de)' -- the second group is #2. */
2272 regnum_t this_group_regnum;
2273
2274 compile_stack.avail--;
2275 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2276 fixup_alt_jump
2277 = COMPILE_STACK_TOP.fixup_alt_jump
2278 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2279 : 0;
2280 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2281 this_group_regnum = COMPILE_STACK_TOP.regnum;
2282 /* If we've reached MAX_REGNUM groups, then this open
2283 won't actually generate any code, so we'll have to
2284 clear pending_exact explicitly. */
2285 pending_exact = 0;
2286
2287 /* We're at the end of the group, so now we know how many
2288 groups were inside this one. */
2289 if (this_group_regnum <= MAX_REGNUM)
2290 {
2291 unsigned char *inner_group_loc
2292 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2293
2294 *inner_group_loc = regnum - this_group_regnum;
2295 BUF_PUSH_3 (stop_memory, this_group_regnum,
2296 regnum - this_group_regnum);
2297 }
2298 }
2299 break;
2300
2301
2302 case '|': /* `\|'. */
2303 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2304 goto normal_backslash;
2305 handle_alt:
2306 if (syntax & RE_LIMITED_OPS)
2307 goto normal_char;
2308
2309 /* Insert before the previous alternative a jump which
2310 jumps to this alternative if the former fails. */
2311 GET_BUFFER_SPACE (3);
2312 INSERT_JUMP (on_failure_jump, begalt, b + 6);
2313 pending_exact = 0;
2314 b += 3;
2315
2316 /* The alternative before this one has a jump after it
2317 which gets executed if it gets matched. Adjust that
2318 jump so it will jump to this alternative's analogous
2319 jump (put in below, which in turn will jump to the next
2320 (if any) alternative's such jump, etc.). The last such
2321 jump jumps to the correct final destination. A picture:
2322 _____ _____
2323 | | | |
2324 | v | v
2325 a | b | c
2326
2327 If we are at `b', then fixup_alt_jump right now points to a
2328 three-byte space after `a'. We'll put in the jump, set
2329 fixup_alt_jump to right after `b', and leave behind three
2330 bytes which we'll fill in when we get to after `c'. */
2331
2332 if (fixup_alt_jump)
2333 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2334
2335 /* Mark and leave space for a jump after this alternative,
2336 to be filled in later either by next alternative or
2337 when know we're at the end of a series of alternatives. */
2338 fixup_alt_jump = b;
2339 GET_BUFFER_SPACE (3);
2340 b += 3;
2341
2342 laststart = 0;
2343 begalt = b;
2344 break;
2345
2346
2347 case '{':
2348 /* If \{ is a literal. */
2349 if (!(syntax & RE_INTERVALS)
2350 /* If we're at `\{' and it's not the open-interval
2351 operator. */
2352 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2353 || (p - 2 == pattern && p == pend))
2354 goto normal_backslash;
2355
2356 handle_interval:
2357 {
2358 /* If got here, then the syntax allows intervals. */
2359
2360 /* At least (most) this many matches must be made. */
2361 int lower_bound = -1, upper_bound = -1;
2362
2363 beg_interval = p - 1;
2364
2365 if (p == pend)
2366 {
2367 if (syntax & RE_NO_BK_BRACES)
2368 goto unfetch_interval;
2369 else
2370 FREE_STACK_RETURN (REG_EBRACE);
2371 }
2372
2373 GET_UNSIGNED_NUMBER (lower_bound);
2374
2375 if (c == ',')
2376 {
2377 GET_UNSIGNED_NUMBER (upper_bound);
2378 if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2379 }
2380 else
2381 /* Interval such as `{1}' => match exactly once. */
2382 upper_bound = lower_bound;
2383
2384 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2385 || lower_bound > upper_bound)
2386 {
2387 if (syntax & RE_NO_BK_BRACES)
2388 goto unfetch_interval;
2389 else
2390 FREE_STACK_RETURN (REG_BADBR);
2391 }
2392
2393 if (!(syntax & RE_NO_BK_BRACES))
2394 {
2395 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2396
2397 PATFETCH (c);
2398 }
2399
2400 if (c != '}')
2401 {
2402 if (syntax & RE_NO_BK_BRACES)
2403 goto unfetch_interval;
2404 else
2405 FREE_STACK_RETURN (REG_BADBR);
2406 }
2407
2408 /* We just parsed a valid interval. */
2409
2410 /* If it's invalid to have no preceding re. */
2411 if (!laststart)
2412 {
2413 if (syntax & RE_CONTEXT_INVALID_OPS)
2414 FREE_STACK_RETURN (REG_BADRPT);
2415 else if (syntax & RE_CONTEXT_INDEP_OPS)
2416 laststart = b;
2417 else
2418 goto unfetch_interval;
2419 }
2420
2421 /* If the upper bound is zero, don't want to succeed at
2422 all; jump from `laststart' to `b + 3', which will be
2423 the end of the buffer after we insert the jump. */
2424 if (upper_bound == 0)
2425 {
2426 GET_BUFFER_SPACE (3);
2427 INSERT_JUMP (jump, laststart, b + 3);
2428 b += 3;
2429 }
2430
2431 /* Otherwise, we have a nontrivial interval. When
2432 we're all done, the pattern will look like:
2433 set_number_at <jump count> <upper bound>
2434 set_number_at <succeed_n count> <lower bound>
2435 succeed_n <after jump addr> <succeed_n count>
2436 <body of loop>
2437 jump_n <succeed_n addr> <jump count>
2438 (The upper bound and `jump_n' are omitted if
2439 `upper_bound' is 1, though.) */
2440 else
2441 { /* If the upper bound is > 1, we need to insert
2442 more at the end of the loop. */
2443 unsigned nbytes = 10 + (upper_bound > 1) * 10;
2444
2445 GET_BUFFER_SPACE (nbytes);
2446
2447 /* Initialize lower bound of the `succeed_n', even
2448 though it will be set during matching by its
2449 attendant `set_number_at' (inserted next),
2450 because `re_compile_fastmap' needs to know.
2451 Jump to the `jump_n' we might insert below. */
2452 INSERT_JUMP2 (succeed_n, laststart,
2453 b + 5 + (upper_bound > 1) * 5,
2454 lower_bound);
2455 b += 5;
2456
2457 /* Code to initialize the lower bound. Insert
2458 before the `succeed_n'. The `5' is the last two
2459 bytes of this `set_number_at', plus 3 bytes of
2460 the following `succeed_n'. */
2461 insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2462 b += 5;
2463
2464 if (upper_bound > 1)
2465 { /* More than one repetition is allowed, so
2466 append a backward jump to the `succeed_n'
2467 that starts this interval.
2468
2469 When we've reached this during matching,
2470 we'll have matched the interval once, so
2471 jump back only `upper_bound - 1' times. */
2472 STORE_JUMP2 (jump_n, b, laststart + 5,
2473 upper_bound - 1);
2474 b += 5;
2475
2476 /* The location we want to set is the second
2477 parameter of the `jump_n'; that is `b-2' as
2478 an absolute address. `laststart' will be
2479 the `set_number_at' we're about to insert;
2480 `laststart+3' the number to set, the source
2481 for the relative address. But we are
2482 inserting into the middle of the pattern --
2483 so everything is getting moved up by 5.
2484 Conclusion: (b - 2) - (laststart + 3) + 5,
2485 i.e., b - laststart.
2486
2487 We insert this at the beginning of the loop
2488 so that if we fail during matching, we'll
2489 reinitialize the bounds. */
2490 insert_op2 (set_number_at, laststart, b - laststart,
2491 upper_bound - 1, b);
2492 b += 5;
2493 }
2494 }
2495 pending_exact = 0;
2496 beg_interval = NULL;
2497 }
2498 break;
2499
2500 unfetch_interval:
2501 /* If an invalid interval, match the characters as literals. */
2502 assert (beg_interval);
2503 p = beg_interval;
2504 beg_interval = NULL;
2505
2506 /* normal_char and normal_backslash need `c'. */
2507 PATFETCH (c);
2508
2509 if (!(syntax & RE_NO_BK_BRACES))
2510 {
2511 if (p > pattern && p[-1] == '\\')
2512 goto normal_backslash;
2513 }
2514 goto normal_char;
2515
2516#ifdef emacs
2517 /* There is no way to specify the before_dot and after_dot
2518 operators. rms says this is ok. --karl */
2519 case '=':
2520 BUF_PUSH (at_dot);
2521 break;
2522
2523 case 's':
2524 laststart = b;
2525 PATFETCH (c);
2526 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2527 break;
2528
2529 case 'S':
2530 laststart = b;
2531 PATFETCH (c);
2532 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2533 break;
2534#endif /* emacs */
2535
2536
2537 case 'w':
2538 laststart = b;
2539 BUF_PUSH (wordchar);
2540 break;
2541
2542
2543 case 'W':
2544 laststart = b;
2545 BUF_PUSH (notwordchar);
2546 break;
2547
2548
2549 case '<':
2550 BUF_PUSH (wordbeg);
2551 break;
2552
2553 case '>':
2554 BUF_PUSH (wordend);
2555 break;
2556
2557 case 'b':
2558 BUF_PUSH (wordbound);
2559 break;
2560
2561 case 'B':
2562 BUF_PUSH (notwordbound);
2563 break;
2564
2565 case '`':
2566 BUF_PUSH (begbuf);
2567 break;
2568
2569 case '\'':
2570 BUF_PUSH (endbuf);
2571 break;
2572
2573 case '1': case '2': case '3': case '4': case '5':
2574 case '6': case '7': case '8': case '9':
2575 if (syntax & RE_NO_BK_REFS)
2576 goto normal_char;
2577
2578 c1 = c - '0';
2579
2580 if (c1 > regnum)
2581 FREE_STACK_RETURN (REG_ESUBREG);
2582
2583 /* Can't back reference to a subexpression if inside of it. */
2584 if (group_in_compile_stack (compile_stack, c1))
2585 goto normal_char;
2586
2587 laststart = b;
2588 BUF_PUSH_2 (duplicate, c1);
2589 break;
2590
2591
2592 case '+':
2593 case '?':
2594 if (syntax & RE_BK_PLUS_QM)
2595 goto handle_plus;
2596 else
2597 goto normal_backslash;
2598
2599 default:
2600 normal_backslash:
2601 /* You might think it would be useful for \ to mean
2602 not to translate; but if we don't translate it
2603 it will never match anything. */
2604 c = TRANSLATE (c);
2605 goto normal_char;
2606 }
2607 break;
2608
2609
2610 default:
2611 /* Expects the character in `c'. */
2612 normal_char:
2613 /* If no exactn currently being built. */
2614 if (!pending_exact
2615
2616 /* If last exactn not at current position. */
2617 || pending_exact + *pending_exact + 1 != b
2618
2619 /* We have only one byte following the exactn for the count. */
2620 || *pending_exact == (1 << BYTEWIDTH) - 1
2621
2622 /* If followed by a repetition operator. */
2623 || *p == '*' || *p == '^'
2624 || ((syntax & RE_BK_PLUS_QM)
2625 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2626 : (*p == '+' || *p == '?'))
2627 || ((syntax & RE_INTERVALS)
2628 && ((syntax & RE_NO_BK_BRACES)
2629 ? *p == '{'
2630 : (p[0] == '\\' && p[1] == '{'))))
2631 {
2632 /* Start building a new exactn. */
2633
2634 laststart = b;
2635
2636 BUF_PUSH_2 (exactn, 0);
2637 pending_exact = b - 1;
2638 }
2639
2640 BUF_PUSH (c);
2641 (*pending_exact)++;
2642 break;
2643 } /* switch (c) */
2644 } /* while p != pend */
2645
2646
2647 /* Through the pattern now. */
2648
2649 if (fixup_alt_jump)
2650 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2651
2652 if (!COMPILE_STACK_EMPTY)
2653 FREE_STACK_RETURN (REG_EPAREN);
2654
2655 /* If we don't want backtracking, force success
2656 the first time we reach the end of the compiled pattern. */
2657 if (syntax & RE_NO_POSIX_BACKTRACKING)
2658 BUF_PUSH (succeed);
2659
2660 free (compile_stack.stack);
2661
2662 /* We have succeeded; set the length of the buffer. */
2663 bufp->used = b - bufp->buffer;
2664
2665#ifdef DEBUG
2666 if (debug)
2667 {
2668 DEBUG_PRINT1 ("\nCompiled pattern: \n");
2669 print_compiled_pattern (bufp);
2670 }
2671#endif /* DEBUG */
2672
2673#ifndef MATCH_MAY_ALLOCATE
2674 /* Initialize the failure stack to the largest possible stack. This
2675 isn't necessary unless we're trying to avoid calling alloca in
2676 the search and match routines. */
2677 {
2678 int num_regs = bufp->re_nsub + 1;
2679
2680 /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
2681 is strictly greater than re_max_failures, the largest possible stack
2682 is 2 * re_max_failures failure points. */
2683 if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
2684 {
2685 fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
2686
2687#ifdef emacs
2688 if (! fail_stack.stack)
2689 fail_stack.stack
2690 = (fail_stack_elt_t *) xmalloc (fail_stack.size
2691 * sizeof (fail_stack_elt_t));
2692 else
2693 fail_stack.stack
2694 = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
2695 (fail_stack.size
2696 * sizeof (fail_stack_elt_t)));
2697#else /* not emacs */
2698 if (! fail_stack.stack)
2699 fail_stack.stack
2700 = (fail_stack_elt_t *) malloc (fail_stack.size
2701 * sizeof (fail_stack_elt_t));
2702 else
2703 fail_stack.stack
2704 = (fail_stack_elt_t *) realloc (fail_stack.stack,
2705 (fail_stack.size
2706 * sizeof (fail_stack_elt_t)));
2707#endif /* not emacs */
2708 }
2709
2710 regex_grow_registers (num_regs);
2711 }
2712#endif /* not MATCH_MAY_ALLOCATE */
2713
2714 return REG_NOERROR;
2715} /* regex_compile */
2716
2717/* Subroutines for `regex_compile'. */
2718
2719/* Store OP at LOC followed by two-byte integer parameter ARG. */
2720
2721static void
2722store_op1 (op, loc, arg)
2723 re_opcode_t op;
2724 unsigned char *loc;
2725 int arg;
2726{
2727 *loc = (unsigned char) op;
2728 STORE_NUMBER (loc + 1, arg);
2729}
2730
2731
2732/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
2733
2734static void
2735store_op2 (op, loc, arg1, arg2)
2736 re_opcode_t op;
2737 unsigned char *loc;
2738 int arg1, arg2;
2739{
2740 *loc = (unsigned char) op;
2741 STORE_NUMBER (loc + 1, arg1);
2742 STORE_NUMBER (loc + 3, arg2);
2743}
2744
2745
2746/* Copy the bytes from LOC to END to open up three bytes of space at LOC
2747 for OP followed by two-byte integer parameter ARG. */
2748
2749static void
2750insert_op1 (op, loc, arg, end)
2751 re_opcode_t op;
2752 unsigned char *loc;
2753 int arg;
2754 unsigned char *end;
2755{
2756 register unsigned char *pfrom = end;
2757 register unsigned char *pto = end + 3;
2758
2759 while (pfrom != loc)
2760 *--pto = *--pfrom;
2761
2762 store_op1 (op, loc, arg);
2763}
2764
2765
2766/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
2767
2768static void
2769insert_op2 (op, loc, arg1, arg2, end)
2770 re_opcode_t op;
2771 unsigned char *loc;
2772 int arg1, arg2;
2773 unsigned char *end;
2774{
2775 register unsigned char *pfrom = end;
2776 register unsigned char *pto = end + 5;
2777
2778 while (pfrom != loc)
2779 *--pto = *--pfrom;
2780
2781 store_op2 (op, loc, arg1, arg2);
2782}
2783
2784
2785/* P points to just after a ^ in PATTERN. Return true if that ^ comes
2786 after an alternative or a begin-subexpression. We assume there is at
2787 least one character before the ^. */
2788
2789static boolean
2790at_begline_loc_p (pattern, p, syntax)
2791 const char *pattern, *p;
2792 reg_syntax_t syntax;
2793{
2794 const char *prev = p - 2;
2795 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2796
2797 return
2798 /* After a subexpression? */
2799 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2800 /* After an alternative? */
2801 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2802}
2803
2804
2805/* The dual of at_begline_loc_p. This one is for $. We assume there is
2806 at least one character after the $, i.e., `P < PEND'. */
2807
2808static boolean
2809at_endline_loc_p (p, pend, syntax)
2810 const char *p, *pend;
2811 int syntax;
2812{
2813 const char *next = p;
2814 boolean next_backslash = *next == '\\';
2815 const char *next_next = p + 1 < pend ? p + 1 : 0;
2816
2817 return
2818 /* Before a subexpression? */
2819 (syntax & RE_NO_BK_PARENS ? *next == ')'
2820 : next_backslash && next_next && *next_next == ')')
2821 /* Before an alternative? */
2822 || (syntax & RE_NO_BK_VBAR ? *next == '|'
2823 : next_backslash && next_next && *next_next == '|');
2824}
2825
2826
2827/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
2828 false if it's not. */
2829
2830static boolean
2831group_in_compile_stack (compile_stack, regnum)
2832 compile_stack_type compile_stack;
2833 regnum_t regnum;
2834{
2835 int this_element;
2836
2837 for (this_element = compile_stack.avail - 1;
2838 this_element >= 0;
2839 this_element--)
2840 if (compile_stack.stack[this_element].regnum == regnum)
2841 return true;
2842
2843 return false;
2844}
2845
2846
2847/* Read the ending character of a range (in a bracket expression) from the
2848 uncompiled pattern *P_PTR (which ends at PEND). We assume the
2849 starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
2850 Then we set the translation of all bits between the starting and
2851 ending characters (inclusive) in the compiled pattern B.
2852
2853 Return an error code.
2854
2855 We use these short variable names so we can use the same macros as
2856 `regex_compile' itself. */
2857
2858static reg_errcode_t
2859compile_range (p_ptr, pend, translate, syntax, b)
2860 const char **p_ptr, *pend;
2861 RE_TRANSLATE_TYPE translate;
2862 reg_syntax_t syntax;
2863 unsigned char *b;
2864{
2865 unsigned this_char;
2866
2867 const char *p = *p_ptr;
2868 int range_start, range_end;
2869
2870 if (p == pend)
2871 return REG_ERANGE;
2872
2873 /* Even though the pattern is a signed `char *', we need to fetch
2874 with unsigned char *'s; if the high bit of the pattern character
2875 is set, the range endpoints will be negative if we fetch using a
2876 signed char *.
2877
2878 We also want to fetch the endpoints without translating them; the
2879 appropriate translation is done in the bit-setting loop below. */
2880 /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */
2881 range_start = ((const unsigned char *) p)[-2];
2882 range_end = ((const unsigned char *) p)[0];
2883
2884 /* Have to increment the pointer into the pattern string, so the
2885 caller isn't still at the ending character. */
2886 (*p_ptr)++;
2887
2888 /* If the start is after the end, the range is empty. */
2889 if (range_start > range_end)
2890 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
2891
2892 /* Here we see why `this_char' has to be larger than an `unsigned
2893 char' -- the range is inclusive, so if `range_end' == 0xff
2894 (assuming 8-bit characters), we would otherwise go into an infinite
2895 loop, since all characters <= 0xff. */
2896 for (this_char = range_start; this_char <= range_end; this_char++)
2897 {
2898 SET_LIST_BIT (TRANSLATE (this_char));
2899 }
2900
2901 return REG_NOERROR;
2902}
2903
2904/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
2905 BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
2906 characters can start a string that matches the pattern. This fastmap
2907 is used by re_search to skip quickly over impossible starting points.
2908
2909 The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2910 area as BUFP->fastmap.
2911
2912 We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
2913 the pattern buffer.
2914
2915 Returns 0 if we succeed, -2 if an internal error. */
2916
2917int
2918re_compile_fastmap (bufp)
2919 struct re_pattern_buffer *bufp;
2920{
2921 int j, k;
2922#ifdef MATCH_MAY_ALLOCATE
2923 fail_stack_type fail_stack;
2924#endif
2925#ifndef REGEX_MALLOC
2926 char *destination;
2927#endif
2928 /* We don't push any register information onto the failure stack. */
2929 unsigned num_regs = 0;
2930
2931 register char *fastmap = bufp->fastmap;
2932 unsigned char *pattern = bufp->buffer;
2933 unsigned long size = bufp->used;
2934 unsigned char *p = pattern;
2935 register unsigned char *pend = pattern + size;
2936
2937 /* This holds the pointer to the failure stack, when
2938 it is allocated relocatably. */
2939#ifdef REL_ALLOC
2940 fail_stack_elt_t *failure_stack_ptr;
2941#endif
2942
2943 /* Assume that each path through the pattern can be null until
2944 proven otherwise. We set this false at the bottom of switch
2945 statement, to which we get only if a particular path doesn't
2946 match the empty string. */
2947 boolean path_can_be_null = true;
2948
2949 /* We aren't doing a `succeed_n' to begin with. */
2950 boolean succeed_n_p = false;
2951
2952 assert (fastmap != NULL && p != NULL);
2953
2954 INIT_FAIL_STACK ();
2955 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
2956 bufp->fastmap_accurate = 1; /* It will be when we're done. */
2957 bufp->can_be_null = 0;
2958
2959 while (1)
2960 {
2961 if (p == pend || *p == succeed)
2962 {
2963 /* We have reached the (effective) end of pattern. */
2964 if (!FAIL_STACK_EMPTY ())
2965 {
2966 bufp->can_be_null |= path_can_be_null;
2967
2968 /* Reset for next path. */
2969 path_can_be_null = true;
2970
2971 p = fail_stack.stack[--fail_stack.avail].pointer;
2972
2973 continue;
2974 }
2975 else
2976 break;
2977 }
2978
2979 /* We should never be about to go beyond the end of the pattern. */
2980 assert (p < pend);
2981
2982 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
2983 {
2984
2985 /* I guess the idea here is to simply not bother with a fastmap
2986 if a backreference is used, since it's too hard to figure out
2987 the fastmap for the corresponding group. Setting
2988 `can_be_null' stops `re_search_2' from using the fastmap, so
2989 that is all we do. */
2990 case duplicate:
2991 bufp->can_be_null = 1;
2992 goto done;
2993
2994
2995 /* Following are the cases which match a character. These end
2996 with `break'. */
2997
2998 case exactn:
2999 fastmap[p[1]] = 1;
3000 break;
3001
3002
3003 case charset:
3004 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3005 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3006 fastmap[j] = 1;
3007 break;
3008
3009
3010 case charset_not:
3011 /* Chars beyond end of map must be allowed. */
3012 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3013 fastmap[j] = 1;
3014
3015 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3016 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3017 fastmap[j] = 1;
3018 break;
3019
3020
3021 case wordchar:
3022 for (j = 0; j < (1 << BYTEWIDTH); j++)
3023 if (SYNTAX (j) == Sword)
3024 fastmap[j] = 1;
3025 break;
3026
3027
3028 case notwordchar:
3029 for (j = 0; j < (1 << BYTEWIDTH); j++)
3030 if (SYNTAX (j) != Sword)
3031 fastmap[j] = 1;
3032 break;
3033
3034
3035 case anychar:
3036 {
3037 int fastmap_newline = fastmap['\n'];
3038
3039 /* `.' matches anything ... */
3040 for (j = 0; j < (1 << BYTEWIDTH); j++)
3041 fastmap[j] = 1;
3042
3043 /* ... except perhaps newline. */
3044 if (!(bufp->syntax & RE_DOT_NEWLINE))
3045 fastmap['\n'] = fastmap_newline;
3046
3047 /* Return if we have already set `can_be_null'; if we have,
3048 then the fastmap is irrelevant. Something's wrong here. */
3049 else if (bufp->can_be_null)
3050 goto done;
3051
3052 /* Otherwise, have to check alternative paths. */
3053 break;
3054 }
3055
3056#ifdef emacs
3057 case syntaxspec:
3058 k = *p++;
3059 for (j = 0; j < (1 << BYTEWIDTH); j++)
3060 if (SYNTAX (j) == (enum syntaxcode) k)
3061 fastmap[j] = 1;
3062 break;
3063
3064
3065 case notsyntaxspec:
3066 k = *p++;
3067 for (j = 0; j < (1 << BYTEWIDTH); j++)
3068 if (SYNTAX (j) != (enum syntaxcode) k)
3069 fastmap[j] = 1;
3070 break;
3071
3072
3073 /* All cases after this match the empty string. These end with
3074 `continue'. */
3075
3076
3077 case before_dot:
3078 case at_dot:
3079 case after_dot:
3080 continue;
3081#endif /* emacs */
3082
3083
3084 case no_op:
3085 case begline:
3086 case endline:
3087 case begbuf:
3088 case endbuf:
3089 case wordbound:
3090 case notwordbound:
3091 case wordbeg:
3092 case wordend:
3093 case push_dummy_failure:
3094 continue;
3095
3096
3097 case jump_n:
3098 case pop_failure_jump:
3099 case maybe_pop_jump:
3100 case jump:
3101 case jump_past_alt:
3102 case dummy_failure_jump:
3103 EXTRACT_NUMBER_AND_INCR (j, p);
3104 p += j;
3105 if (j > 0)
3106 continue;
3107
3108 /* Jump backward implies we just went through the body of a
3109 loop and matched nothing. Opcode jumped to should be
3110 `on_failure_jump' or `succeed_n'. Just treat it like an
3111 ordinary jump. For a * loop, it has pushed its failure
3112 point already; if so, discard that as redundant. */
3113 if ((re_opcode_t) *p != on_failure_jump
3114 && (re_opcode_t) *p != succeed_n)
3115 continue;
3116
3117 p++;
3118 EXTRACT_NUMBER_AND_INCR (j, p);
3119 p += j;
3120
3121 /* If what's on the stack is where we are now, pop it. */
3122 if (!FAIL_STACK_EMPTY ()
3123 && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3124 fail_stack.avail--;
3125
3126 continue;
3127
3128
3129 case on_failure_jump:
3130 case on_failure_keep_string_jump:
3131 handle_on_failure_jump:
3132 EXTRACT_NUMBER_AND_INCR (j, p);
3133
3134 /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3135 end of the pattern. We don't want to push such a point,
3136 since when we restore it above, entering the switch will
3137 increment `p' past the end of the pattern. We don't need
3138 to push such a point since we obviously won't find any more
3139 fastmap entries beyond `pend'. Such a pattern can match
3140 the null string, though. */
3141 if (p + j < pend)
3142 {
3143 if (!PUSH_PATTERN_OP (p + j, fail_stack))
3144 {
3145 RESET_FAIL_STACK ();
3146 return -2;
3147 }
3148 }
3149 else
3150 bufp->can_be_null = 1;
3151
3152 if (succeed_n_p)
3153 {
3154 EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
3155 succeed_n_p = false;
3156 }
3157
3158 continue;
3159
3160
3161 case succeed_n:
3162 /* Get to the number of times to succeed. */
3163 p += 2;
3164
3165 /* Increment p past the n for when k != 0. */
3166 EXTRACT_NUMBER_AND_INCR (k, p);
3167 if (k == 0)
3168 {
3169 p -= 4;
3170 succeed_n_p = true; /* Spaghetti code alert. */
3171 goto handle_on_failure_jump;
3172 }
3173 continue;
3174
3175
3176 case set_number_at:
3177 p += 4;
3178 continue;
3179
3180
3181 case start_memory:
3182 case stop_memory:
3183 p += 2;
3184 continue;
3185
3186
3187 default:
3188 abort (); /* We have listed all the cases. */
3189 } /* switch *p++ */
3190
3191 /* Getting here means we have found the possible starting
3192 characters for one path of the pattern -- and that the empty
3193 string does not match. We need not follow this path further.
3194 Instead, look at the next alternative (remembered on the
3195 stack), or quit if no more. The test at the top of the loop
3196 does these things. */
3197 path_can_be_null = false;
3198 p = pend;
3199 } /* while p */
3200
3201 /* Set `can_be_null' for the last path (also the first path, if the
3202 pattern is empty). */
3203 bufp->can_be_null |= path_can_be_null;
3204
3205 done:
3206 RESET_FAIL_STACK ();
3207 return 0;
3208} /* re_compile_fastmap */
3209
3210/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3211 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
3212 this memory for recording register information. STARTS and ENDS
3213 must be allocated using the malloc library routine, and must each
3214 be at least NUM_REGS * sizeof (regoff_t) bytes long.
3215
3216 If NUM_REGS == 0, then subsequent matches should allocate their own
3217 register data.
3218
3219 Unless this function is called, the first search or match using
3220 PATTERN_BUFFER will allocate its own register data, without
3221 freeing the old data. */
3222
3223void
3224re_set_registers (bufp, regs, num_regs, starts, ends)
3225 struct re_pattern_buffer *bufp;
3226 struct re_registers *regs;
3227 unsigned num_regs;
3228 regoff_t *starts, *ends;
3229{
3230 if (num_regs)
3231 {
3232 bufp->regs_allocated = REGS_REALLOCATE;
3233 regs->num_regs = num_regs;
3234 regs->start = starts;
3235 regs->end = ends;
3236 }
3237 else
3238 {
3239 bufp->regs_allocated = REGS_UNALLOCATED;
3240 regs->num_regs = 0;
3241 regs->start = regs->end = (regoff_t *) 0;
3242 }
3243}
3244
3245/* Searching routines. */
3246
3247/* Like re_search_2, below, but only one string is specified, and
3248 doesn't let you say where to stop matching. */
3249
3250int
3251re_search (bufp, string, size, startpos, range, regs)
3252 struct re_pattern_buffer *bufp;
3253 const char *string;
3254 int size, startpos, range;
3255 struct re_registers *regs;
3256{
3257 return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3258 regs, size);
3259}
3260
3261
3262/* Using the compiled pattern in BUFP->buffer, first tries to match the
3263 virtual concatenation of STRING1 and STRING2, starting first at index
3264 STARTPOS, then at STARTPOS + 1, and so on.
3265
3266 STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3267
3268 RANGE is how far to scan while trying to match. RANGE = 0 means try
3269 only at STARTPOS; in general, the last start tried is STARTPOS +
3270 RANGE.
3271
3272 In REGS, return the indices of the virtual concatenation of STRING1
3273 and STRING2 that matched the entire BUFP->buffer and its contained
3274 subexpressions.
3275
3276 Do not consider matching one past the index STOP in the virtual
3277 concatenation of STRING1 and STRING2.
3278
3279 We return either the position in the strings at which the match was
3280 found, -1 if no match, or -2 if error (such as failure
3281 stack overflow). */
3282
3283int
3284re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3285 struct re_pattern_buffer *bufp;
3286 const char *string1, *string2;
3287 int size1, size2;
3288 int startpos;
3289 int range;
3290 struct re_registers *regs;
3291 int stop;
3292{
3293 int val;
3294 register char *fastmap = bufp->fastmap;
3295 register RE_TRANSLATE_TYPE translate = bufp->translate;
3296 int total_size = size1 + size2;
3297 int endpos = startpos + range;
3298 int anchored_start = 0;
3299
3300 /* Check for out-of-range STARTPOS. */
3301 if (startpos < 0 || startpos > total_size)
3302 return -1;
3303
3304 /* Fix up RANGE if it might eventually take us outside
3305 the virtual concatenation of STRING1 and STRING2.
3306 Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */
3307 if (endpos < 0)
3308 range = 0 - startpos;
3309 else if (endpos > total_size)
3310 range = total_size - startpos;
3311
3312 /* If the search isn't to be a backwards one, don't waste time in a
3313 search for a pattern that must be anchored. */
3314 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
3315 {
3316 if (startpos > 0)
3317 return -1;
3318 else
3319 range = 1;
3320 }
3321
3322#ifdef emacs
3323 /* In a forward search for something that starts with \=.
3324 don't keep searching past point. */
3325 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
3326 {
3327 range = PT - startpos;
3328 if (range <= 0)
3329 return -1;
3330 }
3331#endif /* emacs */
3332
3333 /* Update the fastmap now if not correct already. */
3334 if (fastmap && !bufp->fastmap_accurate)
3335 if (re_compile_fastmap (bufp) == -2)
3336 return -2;
3337
3338 /* See whether the pattern is anchored. */
3339 if (bufp->buffer[0] == begline)
3340 anchored_start = 1;
3341
3342 /* Loop through the string, looking for a place to start matching. */
3343 for (;;)
3344 {
3345 /* If the pattern is anchored,
3346 skip quickly past places we cannot match.
3347 We don't bother to treat startpos == 0 specially
3348 because that case doesn't repeat. */
3349 if (anchored_start && startpos > 0)
3350 {
3351 if (! (bufp->newline_anchor
3352 && ((startpos <= size1 ? string1[startpos - 1]
3353 : string2[startpos - size1 - 1])
3354 == '\n')))
3355 goto advance;
3356 }
3357
3358 /* If a fastmap is supplied, skip quickly over characters that
3359 cannot be the start of a match. If the pattern can match the
3360 null string, however, we don't need to skip characters; we want
3361 the first null string. */
3362 if (fastmap && startpos < total_size && !bufp->can_be_null)
3363 {
3364 if (range > 0) /* Searching forwards. */
3365 {
3366 register const char *d;
3367 register int lim = 0;
3368 int irange = range;
3369
3370 if (startpos < size1 && startpos + range >= size1)
3371 lim = range - (size1 - startpos);
3372
3373 d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
3374
3375 /* Written out as an if-else to avoid testing `translate'
3376 inside the loop. */
3377 if (translate)
3378 while (range > lim
3379 && !fastmap[(unsigned char)
3380 translate[(unsigned char) *d++]])
3381 range--;
3382 else
3383 while (range > lim && !fastmap[(unsigned char) *d++])
3384 range--;
3385
3386 startpos += irange - range;
3387 }
3388 else /* Searching backwards. */
3389 {
3390 register char c = (size1 == 0 || startpos >= size1
3391 ? string2[startpos - size1]
3392 : string1[startpos]);
3393
3394 if (!fastmap[(unsigned char) TRANSLATE (c)])
3395 goto advance;
3396 }
3397 }
3398
3399 /* If can't match the null string, and that's all we have left, fail. */
3400 if (range >= 0 && startpos == total_size && fastmap
3401 && !bufp->can_be_null)
3402 return -1;
3403
3404 val = re_match_2_internal (bufp, string1, size1, string2, size2,
3405 startpos, regs, stop);
3406#ifndef REGEX_MALLOC
3407#ifdef C_ALLOCA
3408 alloca (0);
3409#endif
3410#endif
3411
3412 if (val >= 0)
3413 return startpos;
3414
3415 if (val == -2)
3416 return -2;
3417
3418 advance:
3419 if (!range)
3420 break;
3421 else if (range > 0)
3422 {
3423 range--;
3424 startpos++;
3425 }
3426 else
3427 {
3428 range++;
3429 startpos--;
3430 }
3431 }
3432 return -1;
3433} /* re_search_2 */
3434
3435/* Declarations and macros for re_match_2. */
3436
3437static int bcmp_translate ();
3438static boolean alt_match_null_string_p (),
3439 common_op_match_null_string_p (),
3440 group_match_null_string_p ();
3441
3442/* This converts PTR, a pointer into one of the search strings `string1'
3443 and `string2' into an offset from the beginning of that string. */
3444#define POINTER_TO_OFFSET(ptr) \
3445 (FIRST_STRING_P (ptr) \
3446 ? ((regoff_t) ((ptr) - string1)) \
3447 : ((regoff_t) ((ptr) - string2 + size1)))
3448
3449/* Macros for dealing with the split strings in re_match_2. */
3450
3451#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
3452
3453/* Call before fetching a character with *d. This switches over to
3454 string2 if necessary. */
3455#define PREFETCH() \
3456 while (d == dend) \
3457 { \
3458 /* End of string2 => fail. */ \
3459 if (dend == end_match_2) \
3460 goto fail; \
3461 /* End of string1 => advance to string2. */ \
3462 d = string2; \
3463 dend = end_match_2; \
3464 }
3465
3466
3467/* Test if at very beginning or at very end of the virtual concatenation
3468 of `string1' and `string2'. If only one string, it's `string2'. */
3469#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3470#define AT_STRINGS_END(d) ((d) == end2)
3471
3472
3473/* Test if D points to a character which is word-constituent. We have
3474 two special cases to check for: if past the end of string1, look at
3475 the first character in string2; and if before the beginning of
3476 string2, look at the last character in string1. */
3477#define WORDCHAR_P(d) \
3478 (SYNTAX ((d) == end1 ? *string2 \
3479 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
3480 == Sword)
3481
3482/* Disabled due to a compiler bug -- see comment at case wordbound */
3483#if 0
3484/* Test if the character before D and the one at D differ with respect
3485 to being word-constituent. */
3486#define AT_WORD_BOUNDARY(d) \
3487 (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
3488 || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3489#endif
3490
3491/* Free everything we malloc. */
3492#ifdef MATCH_MAY_ALLOCATE
3493#define FREE_VAR(var) if (var) { REGEX_FREE (var); var = NULL; } else
3494#define FREE_VARIABLES() \
3495 do { \
3496 REGEX_FREE_STACK (fail_stack.stack); \
3497 FREE_VAR (regstart); \
3498 FREE_VAR (regend); \
3499 FREE_VAR (old_regstart); \
3500 FREE_VAR (old_regend); \
3501 FREE_VAR (best_regstart); \
3502 FREE_VAR (best_regend); \
3503 FREE_VAR (reg_info); \
3504 FREE_VAR (reg_dummy); \
3505 FREE_VAR (reg_info_dummy); \
3506 } while (0)
3507#else
3508#define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
3509#endif /* not MATCH_MAY_ALLOCATE */
3510
3511/* These values must meet several constraints. They must not be valid
3512 register values; since we have a limit of 255 registers (because
3513 we use only one byte in the pattern for the register number), we can
3514 use numbers larger than 255. They must differ by 1, because of
3515 NUM_FAILURE_ITEMS above. And the value for the lowest register must
3516 be larger than the value for the highest register, so we do not try
3517 to actually save any registers when none are active. */
3518#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3519#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3520
3521/* Matching routines. */
3522
3523#ifndef emacs /* Emacs never uses this. */
3524/* re_match is like re_match_2 except it takes only a single string. */
3525
3526int
3527re_match (bufp, string, size, pos, regs)
3528 struct re_pattern_buffer *bufp;
3529 const char *string;
3530 int size, pos;
3531 struct re_registers *regs;
3532{
3533 int result = re_match_2_internal (bufp, NULL, 0, string, size,
3534 pos, regs, size);
3535 alloca (0);
3536 return result;
3537}
3538#endif /* not emacs */
3539
3540
3541/* re_match_2 matches the compiled pattern in BUFP against the
3542 the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3543 and SIZE2, respectively). We start matching at POS, and stop
3544 matching at STOP.
3545
3546 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3547 store offsets for the substring each group matched in REGS. See the
3548 documentation for exactly how many groups we fill.
3549
3550 We return -1 if no match, -2 if an internal error (such as the
3551 failure stack overflowing). Otherwise, we return the length of the
3552 matched substring. */
3553
3554int
3555re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3556 struct re_pattern_buffer *bufp;
3557 const char *string1, *string2;
3558 int size1, size2;
3559 int pos;
3560 struct re_registers *regs;
3561 int stop;
3562{
3563 int result = re_match_2_internal (bufp, string1, size1, string2, size2,
3564 pos, regs, stop);
3565 alloca (0);
3566 return result;
3567}
3568
3569/* This is a separate function so that we can force an alloca cleanup
3570 afterwards. */
3571static int
3572re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
3573 struct re_pattern_buffer *bufp;
3574 const char *string1, *string2;
3575 int size1, size2;
3576 int pos;
3577 struct re_registers *regs;
3578 int stop;
3579{
3580 /* General temporaries. */
3581 int mcnt;
3582 unsigned char *p1;
3583
3584 /* Just past the end of the corresponding string. */
3585 const char *end1, *end2;
3586
3587 /* Pointers into string1 and string2, just past the last characters in
3588 each to consider matching. */
3589 const char *end_match_1, *end_match_2;
3590
3591 /* Where we are in the data, and the end of the current string. */
3592 const char *d, *dend;
3593
3594 /* Where we are in the pattern, and the end of the pattern. */
3595 unsigned char *p = bufp->buffer;
3596 register unsigned char *pend = p + bufp->used;
3597
3598 /* Mark the opcode just after a start_memory, so we can test for an
3599 empty subpattern when we get to the stop_memory. */
3600 unsigned char *just_past_start_mem = 0;
3601
3602 /* We use this to map every character in the string. */
3603 RE_TRANSLATE_TYPE translate = bufp->translate;
3604
3605 /* Failure point stack. Each place that can handle a failure further
3606 down the line pushes a failure point on this stack. It consists of
3607 restart, regend, and reg_info for all registers corresponding to
3608 the subexpressions we're currently inside, plus the number of such
3609 registers, and, finally, two char *'s. The first char * is where
3610 to resume scanning the pattern; the second one is where to resume
3611 scanning the strings. If the latter is zero, the failure point is
3612 a ``dummy''; if a failure happens and the failure point is a dummy,
3613 it gets discarded and the next next one is tried. */
3614#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
3615 fail_stack_type fail_stack;
3616#endif
3617#ifdef DEBUG
3618 static unsigned failure_id = 0;
3619 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3620#endif
3621
3622 /* This holds the pointer to the failure stack, when
3623 it is allocated relocatably. */
3624#ifdef REL_ALLOC
3625 fail_stack_elt_t *failure_stack_ptr;
3626#endif
3627
3628 /* We fill all the registers internally, independent of what we
3629 return, for use in backreferences. The number here includes
3630 an element for register zero. */
3631 unsigned num_regs = bufp->re_nsub + 1;
3632
3633 /* The currently active registers. */
3634 unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3635 unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3636
3637 /* Information on the contents of registers. These are pointers into
3638 the input strings; they record just what was matched (on this
3639 attempt) by a subexpression part of the pattern, that is, the
3640 regnum-th regstart pointer points to where in the pattern we began
3641 matching and the regnum-th regend points to right after where we
3642 stopped matching the regnum-th subexpression. (The zeroth register
3643 keeps track of what the whole pattern matches.) */
3644#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
3645 const char **regstart, **regend;
3646#endif
3647
3648 /* If a group that's operated upon by a repetition operator fails to
3649 match anything, then the register for its start will need to be
3650 restored because it will have been set to wherever in the string we
3651 are when we last see its open-group operator. Similarly for a
3652 register's end. */
3653#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
3654 const char **old_regstart, **old_regend;
3655#endif
3656
3657 /* The is_active field of reg_info helps us keep track of which (possibly
3658 nested) subexpressions we are currently in. The matched_something
3659 field of reg_info[reg_num] helps us tell whether or not we have
3660 matched any of the pattern so far this time through the reg_num-th
3661 subexpression. These two fields get reset each time through any
3662 loop their register is in. */
3663#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
3664 register_info_type *reg_info;
3665#endif
3666
3667 /* The following record the register info as found in the above
3668 variables when we find a match better than any we've seen before.
3669 This happens as we backtrack through the failure points, which in
3670 turn happens only if we have not yet matched the entire string. */
3671 unsigned best_regs_set = false;
3672#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
3673 const char **best_regstart, **best_regend;
3674#endif
3675
3676 /* Logically, this is `best_regend[0]'. But we don't want to have to
3677 allocate space for that if we're not allocating space for anything
3678 else (see below). Also, we never need info about register 0 for
3679 any of the other register vectors, and it seems rather a kludge to
3680 treat `best_regend' differently than the rest. So we keep track of
3681 the end of the best match so far in a separate variable. We
3682 initialize this to NULL so that when we backtrack the first time
3683 and need to test it, it's not garbage. */
3684 const char *match_end = NULL;
3685
3686 /* This helps SET_REGS_MATCHED avoid doing redundant work. */
3687 int set_regs_matched_done = 0;
3688
3689 /* Used when we pop values we don't care about. */
3690#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
3691 const char **reg_dummy;
3692 register_info_type *reg_info_dummy;
3693#endif
3694
3695#ifdef DEBUG
3696 /* Counts the total number of registers pushed. */
3697 unsigned num_regs_pushed = 0;
3698#endif
3699
3700 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3701
3702 INIT_FAIL_STACK ();
3703
3704#ifdef MATCH_MAY_ALLOCATE
3705 /* Do not bother to initialize all the register variables if there are
3706 no groups in the pattern, as it takes a fair amount of time. If
3707 there are groups, we include space for register 0 (the whole
3708 pattern), even though we never use it, since it simplifies the
3709 array indexing. We should fix this. */
3710 if (bufp->re_nsub)
3711 {
3712 regstart = REGEX_TALLOC (num_regs, const char *);
3713 regend = REGEX_TALLOC (num_regs, const char *);
3714 old_regstart = REGEX_TALLOC (num_regs, const char *);
3715 old_regend = REGEX_TALLOC (num_regs, const char *);
3716 best_regstart = REGEX_TALLOC (num_regs, const char *);
3717 best_regend = REGEX_TALLOC (num_regs, const char *);
3718 reg_info = REGEX_TALLOC (num_regs, register_info_type);
3719 reg_dummy = REGEX_TALLOC (num_regs, const char *);
3720 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3721
3722 if (!(regstart && regend && old_regstart && old_regend && reg_info
3723 && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3724 {
3725 FREE_VARIABLES ();
3726 return -2;
3727 }
3728 }
3729 else
3730 {
3731 /* We must initialize all our variables to NULL, so that
3732 `FREE_VARIABLES' doesn't try to free them. */
3733 regstart = regend = old_regstart = old_regend = best_regstart
3734 = best_regend = reg_dummy = NULL;
3735 reg_info = reg_info_dummy = (register_info_type *) NULL;
3736 }
3737#endif /* MATCH_MAY_ALLOCATE */
3738
3739 /* The starting position is bogus. */
3740 if (pos < 0 || pos > size1 + size2)
3741 {
3742 FREE_VARIABLES ();
3743 return -1;
3744 }
3745
3746 /* Initialize subexpression text positions to -1 to mark ones that no
3747 start_memory/stop_memory has been seen for. Also initialize the
3748 register information struct. */
3749 for (mcnt = 1; mcnt < num_regs; mcnt++)
3750 {
3751 regstart[mcnt] = regend[mcnt]
3752 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3753
3754 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3755 IS_ACTIVE (reg_info[mcnt]) = 0;
3756 MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3757 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3758 }
3759
3760 /* We move `string1' into `string2' if the latter's empty -- but not if
3761 `string1' is null. */
3762 if (size2 == 0 && string1 != NULL)
3763 {
3764 string2 = string1;
3765 size2 = size1;
3766 string1 = 0;
3767 size1 = 0;
3768 }
3769 end1 = string1 + size1;
3770 end2 = string2 + size2;
3771
3772 /* Compute where to stop matching, within the two strings. */
3773 if (stop <= size1)
3774 {
3775 end_match_1 = string1 + stop;
3776 end_match_2 = string2;
3777 }
3778 else
3779 {
3780 end_match_1 = end1;
3781 end_match_2 = string2 + stop - size1;
3782 }
3783
3784 /* `p' scans through the pattern as `d' scans through the data.
3785 `dend' is the end of the input string that `d' points within. `d'
3786 is advanced into the following input string whenever necessary, but
3787 this happens before fetching; therefore, at the beginning of the
3788 loop, `d' can be pointing at the end of a string, but it cannot
3789 equal `string2'. */
3790 if (size1 > 0 && pos <= size1)
3791 {
3792 d = string1 + pos;
3793 dend = end_match_1;
3794 }
3795 else
3796 {
3797 d = string2 + pos - size1;
3798 dend = end_match_2;
3799 }
3800
3801 DEBUG_PRINT1 ("The compiled pattern is: ");
3802 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3803 DEBUG_PRINT1 ("The string to match is: `");
3804 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3805 DEBUG_PRINT1 ("'\n");
3806
3807 /* This loops over pattern commands. It exits by returning from the
3808 function if the match is complete, or it drops through if the match
3809 fails at this starting point in the input data. */
3810 for (;;)
3811 {
3812 DEBUG_PRINT2 ("\n0x%x: ", p);
3813
3814 if (p == pend)
3815 { /* End of pattern means we might have succeeded. */
3816 DEBUG_PRINT1 ("end of pattern ... ");
3817
3818 /* If we haven't matched the entire string, and we want the
3819 longest match, try backtracking. */
3820 if (d != end_match_2)
3821 {
3822 /* 1 if this match ends in the same string (string1 or string2)
3823 as the best previous match. */
3824 boolean same_str_p = (FIRST_STRING_P (match_end)
3825 == MATCHING_IN_FIRST_STRING);
3826 /* 1 if this match is the best seen so far. */
3827 boolean best_match_p;
3828
3829 /* AIX compiler got confused when this was combined
3830 with the previous declaration. */
3831 if (same_str_p)
3832 best_match_p = d > match_end;
3833 else
3834 best_match_p = !MATCHING_IN_FIRST_STRING;
3835
3836 DEBUG_PRINT1 ("backtracking.\n");
3837
3838 if (!FAIL_STACK_EMPTY ())
3839 { /* More failure points to try. */
3840
3841 /* If exceeds best match so far, save it. */
3842 if (!best_regs_set || best_match_p)
3843 {
3844 best_regs_set = true;
3845 match_end = d;
3846
3847 DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
3848
3849 for (mcnt = 1; mcnt < num_regs; mcnt++)
3850 {
3851 best_regstart[mcnt] = regstart[mcnt];
3852 best_regend[mcnt] = regend[mcnt];
3853 }
3854 }
3855 goto fail;
3856 }
3857
3858 /* If no failure points, don't restore garbage. And if
3859 last match is real best match, don't restore second
3860 best one. */
3861 else if (best_regs_set && !best_match_p)
3862 {
3863 restore_best_regs:
3864 /* Restore best match. It may happen that `dend ==
3865 end_match_1' while the restored d is in string2.
3866 For example, the pattern `x.*y.*z' against the
3867 strings `x-' and `y-z-', if the two strings are
3868 not consecutive in memory. */
3869 DEBUG_PRINT1 ("Restoring best registers.\n");
3870
3871 d = match_end;
3872 dend = ((d >= string1 && d <= end1)
3873 ? end_match_1 : end_match_2);
3874
3875 for (mcnt = 1; mcnt < num_regs; mcnt++)
3876 {
3877 regstart[mcnt] = best_regstart[mcnt];
3878 regend[mcnt] = best_regend[mcnt];
3879 }
3880 }
3881 } /* d != end_match_2 */
3882
3883 succeed_label:
3884 DEBUG_PRINT1 ("Accepting match.\n");
3885
3886 /* If caller wants register contents data back, do it. */
3887 if (regs && !bufp->no_sub)
3888 {
3889 /* Have the register data arrays been allocated? */
3890 if (bufp->regs_allocated == REGS_UNALLOCATED)
3891 { /* No. So allocate them with malloc. We need one
3892 extra element beyond `num_regs' for the `-1' marker
3893 GNU code uses. */
3894 regs->num_regs = MAX (RE_NREGS, num_regs + 1);
3895 regs->start = TALLOC (regs->num_regs, regoff_t);
3896 regs->end = TALLOC (regs->num_regs, regoff_t);
3897 if (regs->start == NULL || regs->end == NULL)
3898 {
3899 FREE_VARIABLES ();
3900 return -2;
3901 }
3902 bufp->regs_allocated = REGS_REALLOCATE;
3903 }
3904 else if (bufp->regs_allocated == REGS_REALLOCATE)
3905 { /* Yes. If we need more elements than were already
3906 allocated, reallocate them. If we need fewer, just
3907 leave it alone. */
3908 if (regs->num_regs < num_regs + 1)
3909 {
3910 regs->num_regs = num_regs + 1;
3911 RETALLOC (regs->start, regs->num_regs, regoff_t);
3912 RETALLOC (regs->end, regs->num_regs, regoff_t);
3913 if (regs->start == NULL || regs->end == NULL)
3914 {
3915 FREE_VARIABLES ();
3916 return -2;
3917 }
3918 }
3919 }
3920 else
3921 {
3922 /* These braces fend off a "empty body in an else-statement"
3923 warning under GCC when assert expands to nothing. */
3924 assert (bufp->regs_allocated == REGS_FIXED);
3925 }
3926
3927 /* Convert the pointer data in `regstart' and `regend' to
3928 indices. Register zero has to be set differently,
3929 since we haven't kept track of any info for it. */
3930 if (regs->num_regs > 0)
3931 {
3932 regs->start[0] = pos;
3933 regs->end[0] = (MATCHING_IN_FIRST_STRING
3934 ? ((regoff_t) (d - string1))
3935 : ((regoff_t) (d - string2 + size1)));
3936 }
3937
3938 /* Go through the first `min (num_regs, regs->num_regs)'
3939 registers, since that is all we initialized. */
3940 for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
3941 {
3942 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
3943 regs->start[mcnt] = regs->end[mcnt] = -1;
3944 else
3945 {
3946 regs->start[mcnt]
3947 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
3948 regs->end[mcnt]
3949 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
3950 }
3951 }
3952
3953 /* If the regs structure we return has more elements than
3954 were in the pattern, set the extra elements to -1. If
3955 we (re)allocated the registers, this is the case,
3956 because we always allocate enough to have at least one
3957 -1 at the end. */
3958 for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
3959 regs->start[mcnt] = regs->end[mcnt] = -1;
3960 } /* regs && !bufp->no_sub */
3961
3962 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
3963 nfailure_points_pushed, nfailure_points_popped,
3964 nfailure_points_pushed - nfailure_points_popped);
3965 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
3966
3967 mcnt = d - pos - (MATCHING_IN_FIRST_STRING
3968 ? string1
3969 : string2 - size1);
3970
3971 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
3972
3973 FREE_VARIABLES ();
3974 return mcnt;
3975 }
3976
3977 /* Otherwise match next pattern command. */
3978 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3979 {
3980 /* Ignore these. Used to ignore the n of succeed_n's which
3981 currently have n == 0. */
3982 case no_op:
3983 DEBUG_PRINT1 ("EXECUTING no_op.\n");
3984 break;
3985
3986 case succeed:
3987 DEBUG_PRINT1 ("EXECUTING succeed.\n");
3988 goto succeed_label;
3989
3990 /* Match the next n pattern characters exactly. The following
3991 byte in the pattern defines n, and the n bytes after that
3992 are the characters to match. */
3993 case exactn:
3994 mcnt = *p++;
3995 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
3996
3997 /* This is written out as an if-else so we don't waste time
3998 testing `translate' inside the loop. */
3999 if (translate)
4000 {
4001 do
4002 {
4003 PREFETCH ();
4004 if ((unsigned char) translate[(unsigned char) *d++]
4005 != (unsigned char) *p++)
4006 goto fail;
4007 }
4008 while (--mcnt);
4009 }
4010 else
4011 {
4012 do
4013 {
4014 PREFETCH ();
4015 if (*d++ != (char) *p++) goto fail;
4016 }
4017 while (--mcnt);
4018 }
4019 SET_REGS_MATCHED ();
4020 break;
4021
4022
4023 /* Match any character except possibly a newline or a null. */
4024 case anychar:
4025 DEBUG_PRINT1 ("EXECUTING anychar.\n");
4026
4027 PREFETCH ();
4028
4029 if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
4030 || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
4031 goto fail;
4032
4033 SET_REGS_MATCHED ();
4034 DEBUG_PRINT2 (" Matched `%d'.\n", *d);
4035 d++;
4036 break;
4037
4038
4039 case charset:
4040 case charset_not:
4041 {
4042 register unsigned char c;
4043 boolean not = (re_opcode_t) *(p - 1) == charset_not;
4044
4045 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4046
4047 PREFETCH ();
4048 c = TRANSLATE (*d); /* The character to match. */
4049
4050 /* Cast to `unsigned' instead of `unsigned char' in case the
4051 bit list is a full 32 bytes long. */
4052 if (c < (unsigned) (*p * BYTEWIDTH)
4053 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4054 not = !not;
4055
4056 p += 1 + *p;
4057
4058 if (!not) goto fail;
4059
4060 SET_REGS_MATCHED ();
4061 d++;
4062 break;
4063 }
4064
4065
4066 /* The beginning of a group is represented by start_memory.
4067 The arguments are the register number in the next byte, and the
4068 number of groups inner to this one in the next. The text
4069 matched within the group is recorded (in the internal
4070 registers data structure) under the register number. */
4071 case start_memory:
4072 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
4073
4074 /* Find out if this group can match the empty string. */
4075 p1 = p; /* To send to group_match_null_string_p. */
4076
4077 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4078 REG_MATCH_NULL_STRING_P (reg_info[*p])
4079 = group_match_null_string_p (&p1, pend, reg_info);
4080
4081 /* Save the position in the string where we were the last time
4082 we were at this open-group operator in case the group is
4083 operated upon by a repetition operator, e.g., with `(a*)*b'
4084 against `ab'; then we want to ignore where we are now in
4085 the string in case this attempt to match fails. */
4086 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4087 ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4088 : regstart[*p];
4089 DEBUG_PRINT2 (" old_regstart: %d\n",
4090 POINTER_TO_OFFSET (old_regstart[*p]));
4091
4092 regstart[*p] = d;
4093 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4094
4095 IS_ACTIVE (reg_info[*p]) = 1;
4096 MATCHED_SOMETHING (reg_info[*p]) = 0;
4097
4098 /* Clear this whenever we change the register activity status. */
4099 set_regs_matched_done = 0;
4100
4101 /* This is the new highest active register. */
4102 highest_active_reg = *p;
4103
4104 /* If nothing was active before, this is the new lowest active
4105 register. */
4106 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4107 lowest_active_reg = *p;
4108
4109 /* Move past the register number and inner group count. */
4110 p += 2;
4111 just_past_start_mem = p;
4112
4113 break;
4114
4115
4116 /* The stop_memory opcode represents the end of a group. Its
4117 arguments are the same as start_memory's: the register
4118 number, and the number of inner groups. */
4119 case stop_memory:
4120 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
4121
4122 /* We need to save the string position the last time we were at
4123 this close-group operator in case the group is operated
4124 upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4125 against `aba'; then we want to ignore where we are now in
4126 the string in case this attempt to match fails. */
4127 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4128 ? REG_UNSET (regend[*p]) ? d : regend[*p]
4129 : regend[*p];
4130 DEBUG_PRINT2 (" old_regend: %d\n",
4131 POINTER_TO_OFFSET (old_regend[*p]));
4132
4133 regend[*p] = d;
4134 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4135
4136 /* This register isn't active anymore. */
4137 IS_ACTIVE (reg_info[*p]) = 0;
4138
4139 /* Clear this whenever we change the register activity status. */
4140 set_regs_matched_done = 0;
4141
4142 /* If this was the only register active, nothing is active
4143 anymore. */
4144 if (lowest_active_reg == highest_active_reg)
4145 {
4146 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4147 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4148 }
4149 else
4150 { /* We must scan for the new highest active register, since
4151 it isn't necessarily one less than now: consider
4152 (a(b)c(d(e)f)g). When group 3 ends, after the f), the
4153 new highest active register is 1. */
4154 unsigned char r = *p - 1;
4155 while (r > 0 && !IS_ACTIVE (reg_info[r]))
4156 r--;
4157
4158 /* If we end up at register zero, that means that we saved
4159 the registers as the result of an `on_failure_jump', not
4160 a `start_memory', and we jumped to past the innermost
4161 `stop_memory'. For example, in ((.)*) we save
4162 registers 1 and 2 as a result of the *, but when we pop
4163 back to the second ), we are at the stop_memory 1.
4164 Thus, nothing is active. */
4165 if (r == 0)
4166 {
4167 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4168 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4169 }
4170 else
4171 highest_active_reg = r;
4172 }
4173
4174 /* If just failed to match something this time around with a
4175 group that's operated on by a repetition operator, try to
4176 force exit from the ``loop'', and restore the register
4177 information for this group that we had before trying this
4178 last match. */
4179 if ((!MATCHED_SOMETHING (reg_info[*p])
4180 || just_past_start_mem == p - 1)
4181 && (p + 2) < pend)
4182 {
4183 boolean is_a_jump_n = false;
4184
4185 p1 = p + 2;
4186 mcnt = 0;
4187 switch ((re_opcode_t) *p1++)
4188 {
4189 case jump_n:
4190 is_a_jump_n = true;
4191 case pop_failure_jump:
4192 case maybe_pop_jump:
4193 case jump:
4194 case dummy_failure_jump:
4195 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4196 if (is_a_jump_n)
4197 p1 += 2;
4198 break;
4199
4200 default:
4201 /* do nothing */ ;
4202 }
4203 p1 += mcnt;
4204
4205 /* If the next operation is a jump backwards in the pattern
4206 to an on_failure_jump right before the start_memory
4207 corresponding to this stop_memory, exit from the loop
4208 by forcing a failure after pushing on the stack the
4209 on_failure_jump's jump in the pattern, and d. */
4210 if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
4211 && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
4212 {
4213 /* If this group ever matched anything, then restore
4214 what its registers were before trying this last
4215 failed match, e.g., with `(a*)*b' against `ab' for
4216 regstart[1], and, e.g., with `((a*)*(b*)*)*'
4217 against `aba' for regend[3].
4218
4219 Also restore the registers for inner groups for,
4220 e.g., `((a*)(b*))*' against `aba' (register 3 would
4221 otherwise get trashed). */
4222
4223 if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4224 {
4225 unsigned r;
4226
4227 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
4228
4229 /* Restore this and inner groups' (if any) registers. */
4230 for (r = *p; r < *p + *(p + 1); r++)
4231 {
4232 regstart[r] = old_regstart[r];
4233
4234 /* xx why this test? */
4235 if (old_regend[r] >= regstart[r])
4236 regend[r] = old_regend[r];
4237 }
4238 }
4239 p1++;
4240 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4241 PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4242
4243 goto fail;
4244 }
4245 }
4246
4247 /* Move past the register number and the inner group count. */
4248 p += 2;
4249 break;
4250
4251
4252 /* \<digit> has been turned into a `duplicate' command which is
4253 followed by the numeric value of <digit> as the register number. */
4254 case duplicate:
4255 {
4256 register const char *d2, *dend2;
4257 int regno = *p++; /* Get which register to match against. */
4258 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4259
4260 /* Can't back reference a group which we've never matched. */
4261 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4262 goto fail;
4263
4264 /* Where in input to try to start matching. */
4265 d2 = regstart[regno];
4266
4267 /* Where to stop matching; if both the place to start and
4268 the place to stop matching are in the same string, then
4269 set to the place to stop, otherwise, for now have to use
4270 the end of the first string. */
4271
4272 dend2 = ((FIRST_STRING_P (regstart[regno])
4273 == FIRST_STRING_P (regend[regno]))
4274 ? regend[regno] : end_match_1);
4275 for (;;)
4276 {
4277 /* If necessary, advance to next segment in register
4278 contents. */
4279 while (d2 == dend2)
4280 {
4281 if (dend2 == end_match_2) break;
4282 if (dend2 == regend[regno]) break;
4283
4284 /* End of string1 => advance to string2. */
4285 d2 = string2;
4286 dend2 = regend[regno];
4287 }
4288 /* At end of register contents => success */
4289 if (d2 == dend2) break;
4290
4291 /* If necessary, advance to next segment in data. */
4292 PREFETCH ();
4293
4294 /* How many characters left in this segment to match. */
4295 mcnt = dend - d;
4296
4297 /* Want how many consecutive characters we can match in
4298 one shot, so, if necessary, adjust the count. */
4299 if (mcnt > dend2 - d2)
4300 mcnt = dend2 - d2;
4301
4302 /* Compare that many; failure if mismatch, else move
4303 past them. */
4304 if (translate
4305 ? bcmp_translate (d, d2, mcnt, translate)
4306 : bcmp (d, d2, mcnt))
4307 goto fail;
4308 d += mcnt, d2 += mcnt;
4309
4310 /* Do this because we've match some characters. */
4311 SET_REGS_MATCHED ();
4312 }
4313 }
4314 break;
4315
4316
4317 /* begline matches the empty string at the beginning of the string
4318 (unless `not_bol' is set in `bufp'), and, if
4319 `newline_anchor' is set, after newlines. */
4320 case begline:
4321 DEBUG_PRINT1 ("EXECUTING begline.\n");
4322
4323 if (AT_STRINGS_BEG (d))
4324 {
4325 if (!bufp->not_bol) break;
4326 }
4327 else if (d[-1] == '\n' && bufp->newline_anchor)
4328 {
4329 break;
4330 }
4331 /* In all other cases, we fail. */
4332 goto fail;
4333
4334
4335 /* endline is the dual of begline. */
4336 case endline:
4337 DEBUG_PRINT1 ("EXECUTING endline.\n");
4338
4339 if (AT_STRINGS_END (d))
4340 {
4341 if (!bufp->not_eol) break;
4342 }
4343
4344 /* We have to ``prefetch'' the next character. */
4345 else if ((d == end1 ? *string2 : *d) == '\n'
4346 && bufp->newline_anchor)
4347 {
4348 break;
4349 }
4350 goto fail;
4351
4352
4353 /* Match at the very beginning of the data. */
4354 case begbuf:
4355 DEBUG_PRINT1 ("EXECUTING begbuf.\n");
4356 if (AT_STRINGS_BEG (d))
4357 break;
4358 goto fail;
4359
4360
4361 /* Match at the very end of the data. */
4362 case endbuf:
4363 DEBUG_PRINT1 ("EXECUTING endbuf.\n");
4364 if (AT_STRINGS_END (d))
4365 break;
4366 goto fail;
4367
4368
4369 /* on_failure_keep_string_jump is used to optimize `.*\n'. It
4370 pushes NULL as the value for the string on the stack. Then
4371 `pop_failure_point' will keep the current value for the
4372 string, instead of restoring it. To see why, consider
4373 matching `foo\nbar' against `.*\n'. The .* matches the foo;
4374 then the . fails against the \n. But the next thing we want
4375 to do is match the \n against the \n; if we restored the
4376 string value, we would be back at the foo.
4377
4378 Because this is used only in specific cases, we don't need to
4379 check all the things that `on_failure_jump' does, to make
4380 sure the right things get saved on the stack. Hence we don't
4381 share its code. The only reason to push anything on the
4382 stack at all is that otherwise we would have to change
4383 `anychar's code to do something besides goto fail in this
4384 case; that seems worse than this. */
4385 case on_failure_keep_string_jump:
4386 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
4387
4388 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4389 DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
4390
4391 PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
4392 break;
4393
4394
4395 /* Uses of on_failure_jump:
4396
4397 Each alternative starts with an on_failure_jump that points
4398 to the beginning of the next alternative. Each alternative
4399 except the last ends with a jump that in effect jumps past
4400 the rest of the alternatives. (They really jump to the
4401 ending jump of the following alternative, because tensioning
4402 these jumps is a hassle.)
4403
4404 Repeats start with an on_failure_jump that points past both
4405 the repetition text and either the following jump or
4406 pop_failure_jump back to this on_failure_jump. */
4407 case on_failure_jump:
4408 on_failure:
4409 DEBUG_PRINT1 ("EXECUTING on_failure_jump");
4410
4411 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4412 DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
4413
4414 /* If this on_failure_jump comes right before a group (i.e.,
4415 the original * applied to a group), save the information
4416 for that group and all inner ones, so that if we fail back
4417 to this point, the group's information will be correct.
4418 For example, in \(a*\)*\1, we need the preceding group,
4419 and in \(zz\(a*\)b*\)\2, we need the inner group. */
4420
4421 /* We can't use `p' to check ahead because we push
4422 a failure point to `p + mcnt' after we do this. */
4423 p1 = p;
4424
4425 /* We need to skip no_op's before we look for the
4426 start_memory in case this on_failure_jump is happening as
4427 the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
4428 against aba. */
4429 while (p1 < pend && (re_opcode_t) *p1 == no_op)
4430 p1++;
4431
4432 if (p1 < pend && (re_opcode_t) *p1 == start_memory)
4433 {
4434 /* We have a new highest active register now. This will
4435 get reset at the start_memory we are about to get to,
4436 but we will have saved all the registers relevant to
4437 this repetition op, as described above. */
4438 highest_active_reg = *(p1 + 1) + *(p1 + 2);
4439 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4440 lowest_active_reg = *(p1 + 1);
4441 }
4442
4443 DEBUG_PRINT1 (":\n");
4444 PUSH_FAILURE_POINT (p + mcnt, d, -2);
4445 break;
4446
4447
4448 /* A smart repeat ends with `maybe_pop_jump'.
4449 We change it to either `pop_failure_jump' or `jump'. */
4450 case maybe_pop_jump:
4451 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4452 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
4453 {
4454 register unsigned char *p2 = p;
4455
4456 /* Compare the beginning of the repeat with what in the
4457 pattern follows its end. If we can establish that there
4458 is nothing that they would both match, i.e., that we
4459 would have to backtrack because of (as in, e.g., `a*a')
4460 then we can change to pop_failure_jump, because we'll
4461 never have to backtrack.
4462
4463 This is not true in the case of alternatives: in
4464 `(a|ab)*' we do need to backtrack to the `ab' alternative
4465 (e.g., if the string was `ab'). But instead of trying to
4466 detect that here, the alternative has put on a dummy
4467 failure point which is what we will end up popping. */
4468
4469 /* Skip over open/close-group commands.
4470 If what follows this loop is a ...+ construct,
4471 look at what begins its body, since we will have to
4472 match at least one of that. */
4473 while (1)
4474 {
4475 if (p2 + 2 < pend
4476 && ((re_opcode_t) *p2 == stop_memory
4477 || (re_opcode_t) *p2 == start_memory))
4478 p2 += 3;
4479 else if (p2 + 6 < pend
4480 && (re_opcode_t) *p2 == dummy_failure_jump)
4481 p2 += 6;
4482 else
4483 break;
4484 }
4485
4486 p1 = p + mcnt;
4487 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4488 to the `maybe_finalize_jump' of this case. Examine what
4489 follows. */
4490
4491 /* If we're at the end of the pattern, we can change. */
4492 if (p2 == pend)
4493 {
4494 /* Consider what happens when matching ":\(.*\)"
4495 against ":/". I don't really understand this code
4496 yet. */
4497 p[-3] = (unsigned char) pop_failure_jump;
4498 DEBUG_PRINT1
4499 (" End of pattern: change to `pop_failure_jump'.\n");
4500 }
4501
4502 else if ((re_opcode_t) *p2 == exactn
4503 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4504 {
4505 register unsigned char c
4506 = *p2 == (unsigned char) endline ? '\n' : p2[2];
4507
4508 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4509 {
4510 p[-3] = (unsigned char) pop_failure_jump;
4511 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
4512 c, p1[5]);
4513 }
4514
4515 else if ((re_opcode_t) p1[3] == charset
4516 || (re_opcode_t) p1[3] == charset_not)
4517 {
4518 int not = (re_opcode_t) p1[3] == charset_not;
4519
4520 if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4521 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4522 not = !not;
4523
4524 /* `not' is equal to 1 if c would match, which means
4525 that we can't change to pop_failure_jump. */
4526 if (!not)
4527 {
4528 p[-3] = (unsigned char) pop_failure_jump;
4529 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
4530 }
4531 }
4532 }
4533 else if ((re_opcode_t) *p2 == charset)
4534 {
4535#ifdef DEBUG
4536 register unsigned char c
4537 = *p2 == (unsigned char) endline ? '\n' : p2[2];
4538#endif
4539
4540 if ((re_opcode_t) p1[3] == exactn
4541 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
4542 && (p2[2 + p1[5] / BYTEWIDTH]
4543 & (1 << (p1[5] % BYTEWIDTH)))))
4544 {
4545 p[-3] = (unsigned char) pop_failure_jump;
4546 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
4547 c, p1[5]);
4548 }
4549
4550 else if ((re_opcode_t) p1[3] == charset_not)
4551 {
4552 int idx;
4553 /* We win if the charset_not inside the loop
4554 lists every character listed in the charset after. */
4555 for (idx = 0; idx < (int) p2[1]; idx++)
4556 if (! (p2[2 + idx] == 0
4557 || (idx < (int) p1[4]
4558 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
4559 break;
4560
4561 if (idx == p2[1])
4562 {
4563 p[-3] = (unsigned char) pop_failure_jump;
4564 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
4565 }
4566 }
4567 else if ((re_opcode_t) p1[3] == charset)
4568 {
4569 int idx;
4570 /* We win if the charset inside the loop
4571 has no overlap with the one after the loop. */
4572 for (idx = 0;
4573 idx < (int) p2[1] && idx < (int) p1[4];
4574 idx++)
4575 if ((p2[2 + idx] & p1[5 + idx]) != 0)
4576 break;
4577
4578 if (idx == p2[1] || idx == p1[4])
4579 {
4580 p[-3] = (unsigned char) pop_failure_jump;
4581 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
4582 }
4583 }
4584 }
4585 }
4586 p -= 2; /* Point at relative address again. */
4587 if ((re_opcode_t) p[-1] != pop_failure_jump)
4588 {
4589 p[-1] = (unsigned char) jump;
4590 DEBUG_PRINT1 (" Match => jump.\n");
4591 goto unconditional_jump;
4592 }
4593 /* Note fall through. */
4594
4595
4596 /* The end of a simple repeat has a pop_failure_jump back to
4597 its matching on_failure_jump, where the latter will push a
4598 failure point. The pop_failure_jump takes off failure
4599 points put on by this pop_failure_jump's matching
4600 on_failure_jump; we got through the pattern to here from the
4601 matching on_failure_jump, so didn't fail. */
4602 case pop_failure_jump:
4603 {
4604 /* We need to pass separate storage for the lowest and
4605 highest registers, even though we don't care about the
4606 actual values. Otherwise, we will restore only one
4607 register from the stack, since lowest will == highest in
4608 `pop_failure_point'. */
4609 unsigned dummy_low_reg, dummy_high_reg;
4610 unsigned char *pdummy;
4611 const char *sdummy;
4612
4613 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4614 POP_FAILURE_POINT (sdummy, pdummy,
4615 dummy_low_reg, dummy_high_reg,
4616 reg_dummy, reg_dummy, reg_info_dummy);
4617 }
4618 /* Note fall through. */
4619
4620
4621 /* Unconditionally jump (without popping any failure points). */
4622 case jump:
4623 unconditional_jump:
4624 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
4625 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4626 p += mcnt; /* Do the jump. */
4627 DEBUG_PRINT2 ("(to 0x%x).\n", p);
4628 break;
4629
4630
4631 /* We need this opcode so we can detect where alternatives end
4632 in `group_match_null_string_p' et al. */
4633 case jump_past_alt:
4634 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4635 goto unconditional_jump;
4636
4637
4638 /* Normally, the on_failure_jump pushes a failure point, which
4639 then gets popped at pop_failure_jump. We will end up at
4640 pop_failure_jump, also, and with a pattern of, say, `a+', we
4641 are skipping over the on_failure_jump, so we have to push
4642 something meaningless for pop_failure_jump to pop. */
4643 case dummy_failure_jump:
4644 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4645 /* It doesn't matter what we push for the string here. What
4646 the code at `fail' tests is the value for the pattern. */
4647 PUSH_FAILURE_POINT (0, 0, -2);
4648 goto unconditional_jump;
4649
4650
4651 /* At the end of an alternative, we need to push a dummy failure
4652 point in case we are followed by a `pop_failure_jump', because
4653 we don't want the failure point for the alternative to be
4654 popped. For example, matching `(a|ab)*' against `aab'
4655 requires that we match the `ab' alternative. */
4656 case push_dummy_failure:
4657 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4658 /* See comments just above at `dummy_failure_jump' about the
4659 two zeroes. */
4660 PUSH_FAILURE_POINT (0, 0, -2);
4661 break;
4662
4663 /* Have to succeed matching what follows at least n times.
4664 After that, handle like `on_failure_jump'. */
4665 case succeed_n:
4666 EXTRACT_NUMBER (mcnt, p + 2);
4667 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4668
4669 assert (mcnt >= 0);
4670 /* Originally, this is how many times we HAVE to succeed. */
4671 if (mcnt > 0)
4672 {
4673 mcnt--;
4674 p += 2;
4675 STORE_NUMBER_AND_INCR (p, mcnt);
4676 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt);
4677 }
4678 else if (mcnt == 0)
4679 {
4680 DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);
4681 p[2] = (unsigned char) no_op;
4682 p[3] = (unsigned char) no_op;
4683 goto on_failure;
4684 }
4685 break;
4686
4687 case jump_n:
4688 EXTRACT_NUMBER (mcnt, p + 2);
4689 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4690
4691 /* Originally, this is how many times we CAN jump. */
4692 if (mcnt)
4693 {
4694 mcnt--;
4695 STORE_NUMBER (p + 2, mcnt);
4696 goto unconditional_jump;
4697 }
4698 /* If don't have to jump any more, skip over the rest of command. */
4699 else
4700 p += 4;
4701 break;
4702
4703 case set_number_at:
4704 {
4705 DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4706
4707 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4708 p1 = p + mcnt;
4709 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4710 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
4711 STORE_NUMBER (p1, mcnt);
4712 break;
4713 }
4714
4715#if 0
4716 /* The DEC Alpha C compiler 3.x generates incorrect code for the
4717 test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of
4718 AT_WORD_BOUNDARY, so this code is disabled. Expanding the
4719 macro and introducing temporary variables works around the bug. */
4720
4721 case wordbound:
4722 DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4723 if (AT_WORD_BOUNDARY (d))
4724 break;
4725 goto fail;
4726
4727 case notwordbound:
4728 DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4729 if (AT_WORD_BOUNDARY (d))
4730 goto fail;
4731 break;
4732#else
4733 case wordbound:
4734 {
4735 boolean prevchar, thischar;
4736
4737 DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4738 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
4739 break;
4740
4741 prevchar = WORDCHAR_P (d - 1);
4742 thischar = WORDCHAR_P (d);
4743 if (prevchar != thischar)
4744 break;
4745 goto fail;
4746 }
4747
4748 case notwordbound:
4749 {
4750 boolean prevchar, thischar;
4751
4752 DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4753 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
4754 goto fail;
4755
4756 prevchar = WORDCHAR_P (d - 1);
4757 thischar = WORDCHAR_P (d);
4758 if (prevchar != thischar)
4759 goto fail;
4760 break;
4761 }
4762#endif
4763
4764 case wordbeg:
4765 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4766 if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4767 break;
4768 goto fail;
4769
4770 case wordend:
4771 DEBUG_PRINT1 ("EXECUTING wordend.\n");
4772 if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4773 && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4774 break;
4775 goto fail;
4776
4777#ifdef emacs
4778 case before_dot:
4779 DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4780 if (PTR_CHAR_POS ((unsigned char *) d) >= PT)
4781 goto fail;
4782 break;
4783
4784 case at_dot:
4785 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4786 if (PTR_CHAR_POS ((unsigned char *) d) != PT)
4787 goto fail;
4788 break;
4789
4790 case after_dot:
4791 DEBUG_PRINT1 ("EXECUTING after_dot.\n");
4792 if (PTR_CHAR_POS ((unsigned char *) d) <= PT)
4793 goto fail;
4794 break;
4795
4796 case syntaxspec:
4797 DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
4798 mcnt = *p++;
4799 goto matchsyntax;
4800
4801 case wordchar:
4802 DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
4803 mcnt = (int) Sword;
4804 matchsyntax:
4805 PREFETCH ();
4806 /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
4807 d++;
4808 if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
4809 goto fail;
4810 SET_REGS_MATCHED ();
4811 break;
4812
4813 case notsyntaxspec:
4814 DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
4815 mcnt = *p++;
4816 goto matchnotsyntax;
4817
4818 case notwordchar:
4819 DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
4820 mcnt = (int) Sword;
4821 matchnotsyntax:
4822 PREFETCH ();
4823 /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
4824 d++;
4825 if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
4826 goto fail;
4827 SET_REGS_MATCHED ();
4828 break;
4829
4830#else /* not emacs */
4831 case wordchar:
4832 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
4833 PREFETCH ();
4834 if (!WORDCHAR_P (d))
4835 goto fail;
4836 SET_REGS_MATCHED ();
4837 d++;
4838 break;
4839
4840 case notwordchar:
4841 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
4842 PREFETCH ();
4843 if (WORDCHAR_P (d))
4844 goto fail;
4845 SET_REGS_MATCHED ();
4846 d++;
4847 break;
4848#endif /* not emacs */
4849
4850 default:
4851 abort ();
4852 }
4853 continue; /* Successfully executed one pattern command; keep going. */
4854
4855
4856 /* We goto here if a matching operation fails. */
4857 fail:
4858 if (!FAIL_STACK_EMPTY ())
4859 { /* A restart point is known. Restore to that state. */
4860 DEBUG_PRINT1 ("\nFAIL:\n");
4861 POP_FAILURE_POINT (d, p,
4862 lowest_active_reg, highest_active_reg,
4863 regstart, regend, reg_info);
4864
4865 /* If this failure point is a dummy, try the next one. */
4866 if (!p)
4867 goto fail;
4868
4869 /* If we failed to the end of the pattern, don't examine *p. */
4870 assert (p <= pend);
4871 if (p < pend)
4872 {
4873 boolean is_a_jump_n = false;
4874
4875 /* If failed to a backwards jump that's part of a repetition
4876 loop, need to pop this failure point and use the next one. */
4877 switch ((re_opcode_t) *p)
4878 {
4879 case jump_n:
4880 is_a_jump_n = true;
4881 case maybe_pop_jump:
4882 case pop_failure_jump:
4883 case jump:
4884 p1 = p + 1;
4885 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4886 p1 += mcnt;
4887
4888 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
4889 || (!is_a_jump_n
4890 && (re_opcode_t) *p1 == on_failure_jump))
4891 goto fail;
4892 break;
4893 default:
4894 /* do nothing */ ;
4895 }
4896 }
4897
4898 if (d >= string1 && d <= end1)
4899 dend = end_match_1;
4900 }
4901 else
4902 break; /* Matching at this starting point really fails. */
4903 } /* for (;;) */
4904
4905 if (best_regs_set)
4906 goto restore_best_regs;
4907
4908 FREE_VARIABLES ();
4909
4910 return -1; /* Failure to match. */
4911} /* re_match_2 */
4912
4913/* Subroutine definitions for re_match_2. */
4914
4915
4916/* We are passed P pointing to a register number after a start_memory.
4917
4918 Return true if the pattern up to the corresponding stop_memory can
4919 match the empty string, and false otherwise.
4920
4921 If we find the matching stop_memory, sets P to point to one past its number.
4922 Otherwise, sets P to an undefined byte less than or equal to END.
4923
4924 We don't handle duplicates properly (yet). */
4925
4926static boolean
4927group_match_null_string_p (p, end, reg_info)
4928 unsigned char **p, *end;
4929 register_info_type *reg_info;
4930{
4931 int mcnt;
4932 /* Point to after the args to the start_memory. */
4933 unsigned char *p1 = *p + 2;
4934
4935 while (p1 < end)
4936 {
4937 /* Skip over opcodes that can match nothing, and return true or
4938 false, as appropriate, when we get to one that can't, or to the
4939 matching stop_memory. */
4940
4941 switch ((re_opcode_t) *p1)
4942 {
4943 /* Could be either a loop or a series of alternatives. */
4944 case on_failure_jump:
4945 p1++;
4946 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4947
4948 /* If the next operation is not a jump backwards in the
4949 pattern. */
4950
4951 if (mcnt >= 0)
4952 {
4953 /* Go through the on_failure_jumps of the alternatives,
4954 seeing if any of the alternatives cannot match nothing.
4955 The last alternative starts with only a jump,
4956 whereas the rest start with on_failure_jump and end
4957 with a jump, e.g., here is the pattern for `a|b|c':
4958
4959 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
4960 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
4961 /exactn/1/c
4962
4963 So, we have to first go through the first (n-1)
4964 alternatives and then deal with the last one separately. */
4965
4966
4967 /* Deal with the first (n-1) alternatives, which start
4968 with an on_failure_jump (see above) that jumps to right
4969 past a jump_past_alt. */
4970
4971 while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
4972 {
4973 /* `mcnt' holds how many bytes long the alternative
4974 is, including the ending `jump_past_alt' and
4975 its number. */
4976
4977 if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
4978 reg_info))
4979 return false;
4980
4981 /* Move to right after this alternative, including the
4982 jump_past_alt. */
4983 p1 += mcnt;
4984
4985 /* Break if it's the beginning of an n-th alternative
4986 that doesn't begin with an on_failure_jump. */
4987 if ((re_opcode_t) *p1 != on_failure_jump)
4988 break;
4989
4990 /* Still have to check that it's not an n-th
4991 alternative that starts with an on_failure_jump. */
4992 p1++;
4993 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4994 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
4995 {
4996 /* Get to the beginning of the n-th alternative. */
4997 p1 -= 3;
4998 break;
4999 }
5000 }
5001
5002 /* Deal with the last alternative: go back and get number
5003 of the `jump_past_alt' just before it. `mcnt' contains
5004 the length of the alternative. */
5005 EXTRACT_NUMBER (mcnt, p1 - 2);
5006
5007 if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
5008 return false;
5009
5010 p1 += mcnt; /* Get past the n-th alternative. */
5011 } /* if mcnt > 0 */
5012 break;
5013
5014
5015 case stop_memory:
5016 assert (p1[1] == **p);
5017 *p = p1 + 2;
5018 return true;
5019
5020
5021 default:
5022 if (!common_op_match_null_string_p (&p1, end, reg_info))
5023 return false;
5024 }
5025 } /* while p1 < end */
5026
5027 return false;
5028} /* group_match_null_string_p */
5029
5030
5031/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
5032 It expects P to be the first byte of a single alternative and END one
5033 byte past the last. The alternative can contain groups. */
5034
5035static boolean
5036alt_match_null_string_p (p, end, reg_info)
5037 unsigned char *p, *end;
5038 register_info_type *reg_info;
5039{
5040 int mcnt;
5041 unsigned char *p1 = p;
5042
5043 while (p1 < end)
5044 {
5045 /* Skip over opcodes that can match nothing, and break when we get
5046 to one that can't. */
5047
5048 switch ((re_opcode_t) *p1)
5049 {
5050 /* It's a loop. */
5051 case on_failure_jump:
5052 p1++;
5053 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5054 p1 += mcnt;
5055 break;
5056
5057 default:
5058 if (!common_op_match_null_string_p (&p1, end, reg_info))
5059 return false;
5060 }
5061 } /* while p1 < end */
5062
5063 return true;
5064} /* alt_match_null_string_p */
5065
5066
5067/* Deals with the ops common to group_match_null_string_p and
5068 alt_match_null_string_p.
5069
5070 Sets P to one after the op and its arguments, if any. */
5071
5072static boolean
5073common_op_match_null_string_p (p, end, reg_info)
5074 unsigned char **p, *end;
5075 register_info_type *reg_info;
5076{
5077 int mcnt;
5078 boolean ret;
5079 int reg_no;
5080 unsigned char *p1 = *p;
5081
5082 switch ((re_opcode_t) *p1++)
5083 {
5084 case no_op:
5085 case begline:
5086 case endline:
5087 case begbuf:
5088 case endbuf:
5089 case wordbeg:
5090 case wordend:
5091 case wordbound:
5092 case notwordbound:
5093#ifdef emacs
5094 case before_dot:
5095 case at_dot:
5096 case after_dot:
5097#endif
5098 break;
5099
5100 case start_memory:
5101 reg_no = *p1;
5102 assert (reg_no > 0 && reg_no <= MAX_REGNUM);
5103 ret = group_match_null_string_p (&p1, end, reg_info);
5104
5105 /* Have to set this here in case we're checking a group which
5106 contains a group and a back reference to it. */
5107
5108 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
5109 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
5110
5111 if (!ret)
5112 return false;
5113 break;
5114
5115 /* If this is an optimized succeed_n for zero times, make the jump. */
5116 case jump:
5117 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5118 if (mcnt >= 0)
5119 p1 += mcnt;
5120 else
5121 return false;
5122 break;
5123
5124 case succeed_n:
5125 /* Get to the number of times to succeed. */
5126 p1 += 2;
5127 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5128
5129 if (mcnt == 0)
5130 {
5131 p1 -= 4;
5132 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5133 p1 += mcnt;
5134 }
5135 else
5136 return false;
5137 break;
5138
5139 case duplicate:
5140 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
5141 return false;
5142 break;
5143
5144 case set_number_at:
5145 p1 += 4;
5146
5147 default:
5148 /* All other opcodes mean we cannot match the empty string. */
5149 return false;
5150 }
5151
5152 *p = p1;
5153 return true;
5154} /* common_op_match_null_string_p */
5155
5156
5157/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5158 bytes; nonzero otherwise. */
5159
5160static int
5161bcmp_translate (s1, s2, len, translate)
5162 unsigned char *s1, *s2;
5163 register int len;
5164 RE_TRANSLATE_TYPE translate;
5165{
5166 register unsigned char *p1 = s1, *p2 = s2;
5167 while (len)
5168 {
5169 if (translate[*p1++] != translate[*p2++]) return 1;
5170 len--;
5171 }
5172 return 0;
5173}
5174
5175/* Entry points for GNU code. */
5176
5177/* re_compile_pattern is the GNU regular expression compiler: it
5178 compiles PATTERN (of length SIZE) and puts the result in BUFP.
5179 Returns 0 if the pattern was valid, otherwise an error string.
5180
5181 Assumes the `allocated' (and perhaps `buffer') and `translate' fields
5182 are set in BUFP on entry.
5183
5184 We call regex_compile to do the actual compilation. */
5185
5186const char *
5187re_compile_pattern (pattern, length, bufp)
5188 const char *pattern;
5189 int length;
5190 struct re_pattern_buffer *bufp;
5191{
5192 reg_errcode_t ret;
5193
5194 /* GNU code is written to assume at least RE_NREGS registers will be set
5195 (and at least one extra will be -1). */
5196 bufp->regs_allocated = REGS_UNALLOCATED;
5197
5198 /* And GNU code determines whether or not to get register information
5199 by passing null for the REGS argument to re_match, etc., not by
5200 setting no_sub. */
5201 bufp->no_sub = 0;
5202
5203 /* Match anchors at newline. */
5204 bufp->newline_anchor = 1;
5205
5206 ret = regex_compile (pattern, length, re_syntax_options, bufp);
5207
5208 if (!ret)
5209 return NULL;
5210 return gettext (re_error_msgid[(int) ret]);
5211}
5212
5213/* Entry points compatible with 4.2 BSD regex library. We don't define
5214 them unless specifically requested. */
5215
5216#if defined (_REGEX_RE_COMP) || defined (_LIBC)
5217
5218/* BSD has one and only one pattern buffer. */
5219static struct re_pattern_buffer re_comp_buf;
5220
5221char *
5222#ifdef _LIBC
5223/* Make these definitions weak in libc, so POSIX programs can redefine
5224 these names if they don't use our functions, and still use
5225 regcomp/regexec below without link errors. */
5226weak_function
5227#endif
5228re_comp (s)
5229 const char *s;
5230{
5231 reg_errcode_t ret;
5232
5233 if (!s)
5234 {
5235 if (!re_comp_buf.buffer)
5236 return gettext ("No previous regular expression");
5237 return 0;
5238 }
5239
5240 if (!re_comp_buf.buffer)
5241 {
5242 re_comp_buf.buffer = (unsigned char *) malloc (200);
5243 if (re_comp_buf.buffer == NULL)
5244 return gettext (re_error_msgid[(int) REG_ESPACE]);
5245 re_comp_buf.allocated = 200;
5246
5247 re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
5248 if (re_comp_buf.fastmap == NULL)
5249 return gettext (re_error_msgid[(int) REG_ESPACE]);
5250 }
5251
5252 /* Since `re_exec' always passes NULL for the `regs' argument, we
5253 don't need to initialize the pattern buffer fields which affect it. */
5254
5255 /* Match anchors at newlines. */
5256 re_comp_buf.newline_anchor = 1;
5257
5258 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
5259
5260 if (!ret)
5261 return NULL;
5262
5263 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
5264 return (char *) gettext (re_error_msgid[(int) ret]);
5265}
5266
5267
5268int
5269#ifdef _LIBC
5270weak_function
5271#endif
5272re_exec (s)
5273 const char *s;
5274{
5275 const int len = strlen (s);
5276 return
5277 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
5278}
5279#endif /* _REGEX_RE_COMP */
5280
5281/* POSIX.2 functions. Don't define these for Emacs. */
5282
5283#ifndef emacs
5284
5285/* regcomp takes a regular expression as a string and compiles it.
5286
5287 PREG is a regex_t *. We do not expect any fields to be initialized,
5288 since POSIX says we shouldn't. Thus, we set
5289
5290 `buffer' to the compiled pattern;
5291 `used' to the length of the compiled pattern;
5292 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5293 REG_EXTENDED bit in CFLAGS is set; otherwise, to
5294 RE_SYNTAX_POSIX_BASIC;
5295 `newline_anchor' to REG_NEWLINE being set in CFLAGS;
5296 `fastmap' and `fastmap_accurate' to zero;
5297 `re_nsub' to the number of subexpressions in PATTERN.
5298
5299 PATTERN is the address of the pattern string.
5300
5301 CFLAGS is a series of bits which affect compilation.
5302
5303 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
5304 use POSIX basic syntax.
5305
5306 If REG_NEWLINE is set, then . and [^...] don't match newline.
5307 Also, regexec will try a match beginning after every newline.
5308
5309 If REG_ICASE is set, then we considers upper- and lowercase
5310 versions of letters to be equivalent when matching.
5311
5312 If REG_NOSUB is set, then when PREG is passed to regexec, that
5313 routine will report only success or failure, and nothing about the
5314 registers.
5315
5316 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
5317 the return codes and their meanings.) */
5318int
5319#ifdef _LIBC
5320weak_function
5321#endif
5322regcomp (preg, pattern, cflags)
5323 regex_t *preg;
5324 const char *pattern;
5325 int cflags;
5326{
5327 reg_errcode_t ret;
5328 unsigned syntax
5329 = (cflags & REG_EXTENDED) ?
5330 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
5331
5332 /* regex_compile will allocate the space for the compiled pattern. */
5333 preg->buffer = 0;
5334 preg->allocated = 0;
5335 preg->used = 0;
5336
5337 /* Don't bother to use a fastmap when searching. This simplifies the
5338 REG_NEWLINE case: if we used a fastmap, we'd have to put all the
5339 characters after newlines into the fastmap. This way, we just try
5340 every character. */
5341 preg->fastmap = 0;
5342
5343 if (cflags & REG_ICASE)
5344 {
5345 unsigned i;
5346
5347 preg->translate
5348 = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
5349 * sizeof (*(RE_TRANSLATE_TYPE)0));
5350 if (preg->translate == NULL)
5351 return (int) REG_ESPACE;
5352
5353 /* Map uppercase characters to corresponding lowercase ones. */
5354 for (i = 0; i < CHAR_SET_SIZE; i++)
5355 preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
5356 }
5357 else
5358 preg->translate = NULL;
5359
5360 /* If REG_NEWLINE is set, newlines are treated differently. */
5361 if (cflags & REG_NEWLINE)
5362 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
5363 syntax &= ~RE_DOT_NEWLINE;
5364 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5365 /* It also changes the matching behavior. */
5366 preg->newline_anchor = 1;
5367 }
5368 else
5369 preg->newline_anchor = 0;
5370
5371 preg->no_sub = !!(cflags & REG_NOSUB);
5372
5373 /* POSIX says a null character in the pattern terminates it, so we
5374 can use strlen here in compiling the pattern. */
5375 ret = regex_compile (pattern, strlen (pattern), syntax, preg);
5376
5377 /* POSIX doesn't distinguish between an unmatched open-group and an
5378 unmatched close-group: both are REG_EPAREN. */
5379 if (ret == REG_ERPAREN) ret = REG_EPAREN;
5380
5381 return (int) ret;
5382}
5383
5384
5385/* regexec searches for a given pattern, specified by PREG, in the
5386 string STRING.
5387
5388 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
5389 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
5390 least NMATCH elements, and we set them to the offsets of the
5391 corresponding matched substrings.
5392
5393 EFLAGS specifies `execution flags' which affect matching: if
5394 REG_NOTBOL is set, then ^ does not match at the beginning of the
5395 string; if REG_NOTEOL is set, then $ does not match at the end.
5396
5397 We return 0 if we find a match and REG_NOMATCH if not. */
5398int
5399#ifdef _LIBC
5400weak_function
5401#endif
5402regexec (preg, string, nmatch, pmatch, eflags)
5403 const regex_t *preg;
5404 const char *string;
5405 size_t nmatch;
5406 regmatch_t pmatch[];
5407 int eflags;
5408{
5409 int ret;
5410 struct re_registers regs;
5411 regex_t private_preg;
5412 int len = strlen (string);
5413 boolean want_reg_info = !preg->no_sub && nmatch > 0;
5414
5415 private_preg = *preg;
5416
5417 private_preg.not_bol = !!(eflags & REG_NOTBOL);
5418 private_preg.not_eol = !!(eflags & REG_NOTEOL);
5419
5420 /* The user has told us exactly how many registers to return
5421 information about, via `nmatch'. We have to pass that on to the
5422 matching routines. */
5423 private_preg.regs_allocated = REGS_FIXED;
5424
5425 if (want_reg_info)
5426 {
5427 regs.num_regs = nmatch;
5428 regs.start = TALLOC (nmatch, regoff_t);
5429 regs.end = TALLOC (nmatch, regoff_t);
5430 if (regs.start == NULL || regs.end == NULL)
5431 return (int) REG_NOMATCH;
5432 }
5433
5434 /* Perform the searching operation. */
5435 ret = re_search (&private_preg, string, len,
5436 /* start: */ 0, /* range: */ len,
5437 want_reg_info ? &regs : (struct re_registers *) 0);
5438
5439 /* Copy the register information to the POSIX structure. */
5440 if (want_reg_info)
5441 {
5442 if (ret >= 0)
5443 {
5444 unsigned r;
5445
5446 for (r = 0; r < nmatch; r++)
5447 {
5448 pmatch[r].rm_so = regs.start[r];
5449 pmatch[r].rm_eo = regs.end[r];
5450 }
5451 }
5452
5453 /* If we needed the temporary register info, free the space now. */
5454 free (regs.start);
5455 free (regs.end);
5456 }
5457
5458 /* We want zero return to mean success, unlike `re_search'. */
5459 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
5460}
5461
5462
5463/* Returns a message corresponding to an error code, ERRCODE, returned
5464 from either regcomp or regexec. We don't use PREG here. */
5465size_t
5466#ifdef _LIBC
5467/* Make these definitions weak in libc, so POSIX programs can redefine
5468 these names if they don't use our functions, and still use
5469 regcomp/regexec below without link errors. */
5470weak_function
5471#endif
5472regerror (errcode, preg, errbuf, errbuf_size)
5473 int errcode;
5474 const regex_t *preg;
5475 char *errbuf;
5476 size_t errbuf_size;
5477{
5478 const char *msg;
5479 size_t msg_size;
5480
5481 if (errcode < 0
5482 || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
5483 /* Only error codes returned by the rest of the code should be passed
5484 to this routine. If we are given anything else, or if other regex
5485 code generates an invalid error code, then the program has a bug.
5486 Dump core so we can fix it. */
5487 abort ();
5488
5489 msg = gettext (re_error_msgid[errcode]);
5490
5491 msg_size = strlen (msg) + 1; /* Includes the null. */
5492
5493 if (errbuf_size != 0)
5494 {
5495 if (msg_size > errbuf_size)
5496 {
5497 strncpy (errbuf, msg, errbuf_size - 1);
5498 errbuf[errbuf_size - 1] = 0;
5499 }
5500 else
5501 strcpy (errbuf, msg);
5502 }
5503
5504 return msg_size;
5505}
5506
5507
5508/* Free dynamically allocated space used by PREG. */
5509
5510void
5511#ifdef _LIBC
5512/* Make these definitions weak in libc, so POSIX programs can redefine
5513 these names if they don't use our functions, and still use
5514 regcomp/regexec below without link errors. */
5515weak_function
5516#endif
5517regfree (preg)
5518 regex_t *preg;
5519{
5520 if (preg->buffer != NULL)
5521 free (preg->buffer);
5522 preg->buffer = NULL;
5523
5524 preg->allocated = 0;
5525 preg->used = 0;
5526
5527 if (preg->fastmap != NULL)
5528 free (preg->fastmap);
5529 preg->fastmap = NULL;
5530 preg->fastmap_accurate = 0;
5531
5532 if (preg->translate != NULL)
5533 free (preg->translate);
5534 preg->translate = NULL;
5535}
5536
5537#endif /* not emacs */