blob: 70baa1394f57e05df39226e4073a7a27a5125db7 [file] [log] [blame]
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001/*************************************************
2* Perl-Compatible Regular Expressions *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
Elliott Hughes653c2102019-01-09 15:41:36 -080010 New API code Copyright (c) 2016-2018 University of Cambridge
Janis Danisevskis112c9cc2016-03-31 13:35:25 +010011
12-----------------------------------------------------------------------------
13Redistribution and use in source and binary forms, with or without
14modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37POSSIBILITY OF SUCH DAMAGE.
38-----------------------------------------------------------------------------
39*/
40
41
42/* This module contains a single function that scans through a compiled pattern
43until it finds a capturing bracket with the given number, or, if the number is
44negative, an instance of OP_REVERSE for a lookbehind. The function is called
45from pcre2_compile.c and also from pcre2_study.c when finding the minimum
46matching length. */
47
48
49#ifdef HAVE_CONFIG_H
50#include "config.h"
51#endif
52
53#include "pcre2_internal.h"
54
55
56/*************************************************
57* Scan compiled regex for specific bracket *
58*************************************************/
59
60/*
61Arguments:
62 code points to start of expression
63 utf TRUE in UTF mode
64 number the required bracket number or negative to find a lookbehind
65
66Returns: pointer to the opcode for the bracket, or NULL if not found
67*/
68
69PCRE2_SPTR
70PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
71{
72for (;;)
73 {
Elliott Hughes9bc971b2018-07-27 13:23:14 -070074 PCRE2_UCHAR c = *code;
Janis Danisevskis112c9cc2016-03-31 13:35:25 +010075
76 if (c == OP_END) return NULL;
77
78 /* XCLASS is used for classes that cannot be represented just by a bit map.
79 This includes negated single high-valued characters. CALLOUT_STR is used for
80 callouts with string arguments. In both cases the length in the table is
81 zero; the actual length is stored in the compiled code. */
82
83 if (c == OP_XCLASS) code += GET(code, 1);
84 else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
85
86 /* Handle lookbehind */
87
88 else if (c == OP_REVERSE)
89 {
90 if (number < 0) return (PCRE2_UCHAR *)code;
91 code += PRIV(OP_lengths)[c];
92 }
93
94 /* Handle capturing bracket */
95
96 else if (c == OP_CBRA || c == OP_SCBRA ||
97 c == OP_CBRAPOS || c == OP_SCBRAPOS)
98 {
99 int n = (int)GET2(code, 1+LINK_SIZE);
100 if (n == number) return (PCRE2_UCHAR *)code;
101 code += PRIV(OP_lengths)[c];
102 }
103
104 /* Otherwise, we can get the item's length from the table, except that for
105 repeated character types, we have to test for \p and \P, which have an extra
106 two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
107 must add in its length. */
108
109 else
110 {
111 switch(c)
112 {
113 case OP_TYPESTAR:
114 case OP_TYPEMINSTAR:
115 case OP_TYPEPLUS:
116 case OP_TYPEMINPLUS:
117 case OP_TYPEQUERY:
118 case OP_TYPEMINQUERY:
119 case OP_TYPEPOSSTAR:
120 case OP_TYPEPOSPLUS:
121 case OP_TYPEPOSQUERY:
122 if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
123 break;
124
125 case OP_TYPEUPTO:
126 case OP_TYPEMINUPTO:
127 case OP_TYPEEXACT:
128 case OP_TYPEPOSUPTO:
129 if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
130 code += 2;
131 break;
132
133 case OP_MARK:
Elliott Hughes653c2102019-01-09 15:41:36 -0800134 case OP_COMMIT_ARG:
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100135 case OP_PRUNE_ARG:
136 case OP_SKIP_ARG:
137 case OP_THEN_ARG:
138 code += code[1];
139 break;
140 }
141
142 /* Add in the fixed length from the table */
143
144 code += PRIV(OP_lengths)[c];
145
146 /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
147 followed by a multi-byte character. The length in the table is a minimum, so
148 we have to arrange to skip the extra bytes. */
149
150#ifdef MAYBE_UTF_MULTI
151 if (utf) switch(c)
152 {
153 case OP_CHAR:
154 case OP_CHARI:
155 case OP_NOT:
156 case OP_NOTI:
157 case OP_EXACT:
158 case OP_EXACTI:
159 case OP_NOTEXACT:
160 case OP_NOTEXACTI:
161 case OP_UPTO:
162 case OP_UPTOI:
163 case OP_NOTUPTO:
164 case OP_NOTUPTOI:
165 case OP_MINUPTO:
166 case OP_MINUPTOI:
167 case OP_NOTMINUPTO:
168 case OP_NOTMINUPTOI:
169 case OP_POSUPTO:
170 case OP_POSUPTOI:
171 case OP_NOTPOSUPTO:
172 case OP_NOTPOSUPTOI:
173 case OP_STAR:
174 case OP_STARI:
175 case OP_NOTSTAR:
176 case OP_NOTSTARI:
177 case OP_MINSTAR:
178 case OP_MINSTARI:
179 case OP_NOTMINSTAR:
180 case OP_NOTMINSTARI:
181 case OP_POSSTAR:
182 case OP_POSSTARI:
183 case OP_NOTPOSSTAR:
184 case OP_NOTPOSSTARI:
185 case OP_PLUS:
186 case OP_PLUSI:
187 case OP_NOTPLUS:
188 case OP_NOTPLUSI:
189 case OP_MINPLUS:
190 case OP_MINPLUSI:
191 case OP_NOTMINPLUS:
192 case OP_NOTMINPLUSI:
193 case OP_POSPLUS:
194 case OP_POSPLUSI:
195 case OP_NOTPOSPLUS:
196 case OP_NOTPOSPLUSI:
197 case OP_QUERY:
198 case OP_QUERYI:
199 case OP_NOTQUERY:
200 case OP_NOTQUERYI:
201 case OP_MINQUERY:
202 case OP_MINQUERYI:
203 case OP_NOTMINQUERY:
204 case OP_NOTMINQUERYI:
205 case OP_POSQUERY:
206 case OP_POSQUERYI:
207 case OP_NOTPOSQUERY:
208 case OP_NOTPOSQUERYI:
209 if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
210 break;
211 }
212#else
213 (void)(utf); /* Keep compiler happy by referencing function argument */
214#endif /* MAYBE_UTF_MULTI */
215 }
216 }
217}
218
219/* End of pcre2_find_bracket.c */