blob: a29f5eff673bed667eaaf5255f6de7554383294c [file] [log] [blame]
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01001/*************************************************
2* Perl-Compatible Regular Expressions *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
Elliott Hughes653c2102019-01-09 15:41:36 -080010 New API code Copyright (c) 2016-2018 University of Cambridge
Janis Danisevskis112c9cc2016-03-31 13:35:25 +010011
12-----------------------------------------------------------------------------
13Redistribution and use in source and binary forms, with or without
14modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37POSSIBILITY OF SUCH DAMAGE.
38-----------------------------------------------------------------------------
39*/
40
41
42#ifdef HAVE_CONFIG_H
43#include "config.h"
44#endif
45
46#include "pcre2_internal.h"
47
48
49/*************************************************
50* Return info about compiled pattern *
51*************************************************/
52
53/*
54Arguments:
55 code points to compiled code
56 what what information is required
57 where where to put the information; if NULL, return length
58
59Returns: 0 when data returned
60 > 0 when length requested
61 < 0 on error or unset value
62*/
63
64PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
65pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
66{
67const pcre2_real_code *re = (pcre2_real_code *)code;
68
69if (where == NULL) /* Requests field length */
70 {
71 switch(what)
72 {
73 case PCRE2_INFO_ALLOPTIONS:
74 case PCRE2_INFO_ARGOPTIONS:
75 case PCRE2_INFO_BACKREFMAX:
76 case PCRE2_INFO_BSR:
77 case PCRE2_INFO_CAPTURECOUNT:
Elliott Hughes9bc971b2018-07-27 13:23:14 -070078 case PCRE2_INFO_DEPTHLIMIT:
79 case PCRE2_INFO_EXTRAOPTIONS:
Janis Danisevskis112c9cc2016-03-31 13:35:25 +010080 case PCRE2_INFO_FIRSTCODETYPE:
81 case PCRE2_INFO_FIRSTCODEUNIT:
82 case PCRE2_INFO_HASBACKSLASHC:
83 case PCRE2_INFO_HASCRORLF:
Elliott Hughes9bc971b2018-07-27 13:23:14 -070084 case PCRE2_INFO_HEAPLIMIT:
Janis Danisevskis112c9cc2016-03-31 13:35:25 +010085 case PCRE2_INFO_JCHANGED:
86 case PCRE2_INFO_LASTCODETYPE:
87 case PCRE2_INFO_LASTCODEUNIT:
88 case PCRE2_INFO_MATCHEMPTY:
89 case PCRE2_INFO_MATCHLIMIT:
90 case PCRE2_INFO_MAXLOOKBEHIND:
91 case PCRE2_INFO_MINLENGTH:
92 case PCRE2_INFO_NAMEENTRYSIZE:
93 case PCRE2_INFO_NAMECOUNT:
94 case PCRE2_INFO_NEWLINE:
Janis Danisevskis112c9cc2016-03-31 13:35:25 +010095 return sizeof(uint32_t);
96
97 case PCRE2_INFO_FIRSTBITMAP:
98 return sizeof(const uint8_t *);
99
100 case PCRE2_INFO_JITSIZE:
101 case PCRE2_INFO_SIZE:
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700102 case PCRE2_INFO_FRAMESIZE:
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100103 return sizeof(size_t);
104
105 case PCRE2_INFO_NAMETABLE:
106 return sizeof(PCRE2_SPTR);
107 }
108 }
109
110if (re == NULL) return PCRE2_ERROR_NULL;
111
112/* Check that the first field in the block is the magic number. If it is not,
113return with PCRE2_ERROR_BADMAGIC. */
114
115if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
116
117/* Check that this pattern was compiled in the correct bit mode */
118
119if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
120
121switch(what)
122 {
123 case PCRE2_INFO_ALLOPTIONS:
124 *((uint32_t *)where) = re->overall_options;
125 break;
126
127 case PCRE2_INFO_ARGOPTIONS:
128 *((uint32_t *)where) = re->compile_options;
129 break;
130
131 case PCRE2_INFO_BACKREFMAX:
132 *((uint32_t *)where) = re->top_backref;
133 break;
134
135 case PCRE2_INFO_BSR:
136 *((uint32_t *)where) = re->bsr_convention;
137 break;
138
139 case PCRE2_INFO_CAPTURECOUNT:
140 *((uint32_t *)where) = re->top_bracket;
141 break;
142
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700143 case PCRE2_INFO_DEPTHLIMIT:
144 *((uint32_t *)where) = re->limit_depth;
145 if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET;
146 break;
147
148 case PCRE2_INFO_EXTRAOPTIONS:
149 *((uint32_t *)where) = re->extra_options;
150 break;
151
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100152 case PCRE2_INFO_FIRSTCODETYPE:
153 *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :
154 ((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;
155 break;
156
157 case PCRE2_INFO_FIRSTCODEUNIT:
158 *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)?
159 re->first_codeunit : 0;
160 break;
161
162 case PCRE2_INFO_FIRSTBITMAP:
163 *((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)?
164 &(re->start_bitmap[0]) : NULL;
165 break;
166
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700167 case PCRE2_INFO_FRAMESIZE:
168 *((size_t *)where) = offsetof(heapframe, ovector) +
169 re->top_bracket * 2 * sizeof(PCRE2_SIZE);
170 break;
171
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100172 case PCRE2_INFO_HASBACKSLASHC:
173 *((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0;
174 break;
175
176 case PCRE2_INFO_HASCRORLF:
177 *((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
178 break;
179
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700180 case PCRE2_INFO_HEAPLIMIT:
181 *((uint32_t *)where) = re->limit_heap;
182 if (re->limit_heap == UINT32_MAX) return PCRE2_ERROR_UNSET;
183 break;
184
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100185 case PCRE2_INFO_JCHANGED:
186 *((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
187 break;
188
189 case PCRE2_INFO_JITSIZE:
190#ifdef SUPPORT_JIT
191 *((size_t *)where) = (re->executable_jit != NULL)?
192 PRIV(jit_get_size)(re->executable_jit) : 0;
193#else
194 *((size_t *)where) = 0;
195#endif
196 break;
197
198 case PCRE2_INFO_LASTCODETYPE:
199 *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0;
200 break;
201
202 case PCRE2_INFO_LASTCODEUNIT:
203 *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)?
204 re->last_codeunit : 0;
205 break;
206
207 case PCRE2_INFO_MATCHEMPTY:
208 *((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0;
209 break;
210
211 case PCRE2_INFO_MATCHLIMIT:
212 *((uint32_t *)where) = re->limit_match;
213 if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET;
214 break;
215
216 case PCRE2_INFO_MAXLOOKBEHIND:
217 *((uint32_t *)where) = re->max_lookbehind;
218 break;
219
220 case PCRE2_INFO_MINLENGTH:
221 *((uint32_t *)where) = re->minlength;
222 break;
223
224 case PCRE2_INFO_NAMEENTRYSIZE:
225 *((uint32_t *)where) = re->name_entry_size;
226 break;
227
228 case PCRE2_INFO_NAMECOUNT:
229 *((uint32_t *)where) = re->name_count;
230 break;
231
232 case PCRE2_INFO_NAMETABLE:
233 *((PCRE2_SPTR *)where) = (PCRE2_SPTR)((char *)re + sizeof(pcre2_real_code));
234 break;
235
236 case PCRE2_INFO_NEWLINE:
237 *((uint32_t *)where) = re->newline_convention;
238 break;
239
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100240 case PCRE2_INFO_SIZE:
241 *((size_t *)where) = re->blocksize;
242 break;
243
244 default: return PCRE2_ERROR_BADOPTION;
245 }
246
247return 0;
248}
249
250
251
252/*************************************************
253* Callout enumerator *
254*************************************************/
255
256/*
257Arguments:
258 code points to compiled code
259 callback function called for each callout block
260 callout_data user data passed to the callback
261
262Returns: 0 when successfully completed
263 < 0 on local error
264 != 0 for callback error
265*/
266
267PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
268pcre2_callout_enumerate(const pcre2_code *code,
269 int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data)
270{
271pcre2_real_code *re = (pcre2_real_code *)code;
272pcre2_callout_enumerate_block cb;
273PCRE2_SPTR cc;
274#ifdef SUPPORT_UNICODE
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700275BOOL utf;
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100276#endif
277
278if (re == NULL) return PCRE2_ERROR_NULL;
279
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700280#ifdef SUPPORT_UNICODE
281utf = (re->overall_options & PCRE2_UTF) != 0;
282#endif
283
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100284/* Check that the first field in the block is the magic number. If it is not,
285return with PCRE2_ERROR_BADMAGIC. */
286
287if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
288
289/* Check that this pattern was compiled in the correct bit mode */
290
291if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
292
293cb.version = 0;
294cc = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code))
295 + re->name_count * re->name_entry_size;
296
297while (TRUE)
298 {
299 int rc;
300 switch (*cc)
301 {
302 case OP_END:
303 return 0;
304
305 case OP_CHAR:
306 case OP_CHARI:
307 case OP_NOT:
308 case OP_NOTI:
309 case OP_STAR:
310 case OP_MINSTAR:
311 case OP_PLUS:
312 case OP_MINPLUS:
313 case OP_QUERY:
314 case OP_MINQUERY:
315 case OP_UPTO:
316 case OP_MINUPTO:
317 case OP_EXACT:
318 case OP_POSSTAR:
319 case OP_POSPLUS:
320 case OP_POSQUERY:
321 case OP_POSUPTO:
322 case OP_STARI:
323 case OP_MINSTARI:
324 case OP_PLUSI:
325 case OP_MINPLUSI:
326 case OP_QUERYI:
327 case OP_MINQUERYI:
328 case OP_UPTOI:
329 case OP_MINUPTOI:
330 case OP_EXACTI:
331 case OP_POSSTARI:
332 case OP_POSPLUSI:
333 case OP_POSQUERYI:
334 case OP_POSUPTOI:
335 case OP_NOTSTAR:
336 case OP_NOTMINSTAR:
337 case OP_NOTPLUS:
338 case OP_NOTMINPLUS:
339 case OP_NOTQUERY:
340 case OP_NOTMINQUERY:
341 case OP_NOTUPTO:
342 case OP_NOTMINUPTO:
343 case OP_NOTEXACT:
344 case OP_NOTPOSSTAR:
345 case OP_NOTPOSPLUS:
346 case OP_NOTPOSQUERY:
347 case OP_NOTPOSUPTO:
348 case OP_NOTSTARI:
349 case OP_NOTMINSTARI:
350 case OP_NOTPLUSI:
351 case OP_NOTMINPLUSI:
352 case OP_NOTQUERYI:
353 case OP_NOTMINQUERYI:
354 case OP_NOTUPTOI:
355 case OP_NOTMINUPTOI:
356 case OP_NOTEXACTI:
357 case OP_NOTPOSSTARI:
358 case OP_NOTPOSPLUSI:
359 case OP_NOTPOSQUERYI:
360 case OP_NOTPOSUPTOI:
361 cc += PRIV(OP_lengths)[*cc];
362#ifdef SUPPORT_UNICODE
363 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
364#endif
365 break;
366
367 case OP_TYPESTAR:
368 case OP_TYPEMINSTAR:
369 case OP_TYPEPLUS:
370 case OP_TYPEMINPLUS:
371 case OP_TYPEQUERY:
372 case OP_TYPEMINQUERY:
373 case OP_TYPEUPTO:
374 case OP_TYPEMINUPTO:
375 case OP_TYPEEXACT:
376 case OP_TYPEPOSSTAR:
377 case OP_TYPEPOSPLUS:
378 case OP_TYPEPOSQUERY:
379 case OP_TYPEPOSUPTO:
380 cc += PRIV(OP_lengths)[*cc];
381#ifdef SUPPORT_UNICODE
382 if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2;
383#endif
384 break;
385
386#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
387 case OP_XCLASS:
388 cc += GET(cc, 1);
389 break;
390#endif
391
392 case OP_MARK:
Elliott Hughes653c2102019-01-09 15:41:36 -0800393 case OP_COMMIT_ARG:
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100394 case OP_PRUNE_ARG:
395 case OP_SKIP_ARG:
396 case OP_THEN_ARG:
397 cc += PRIV(OP_lengths)[*cc] + cc[1];
398 break;
399
400 case OP_CALLOUT:
401 cb.pattern_position = GET(cc, 1);
402 cb.next_item_length = GET(cc, 1 + LINK_SIZE);
403 cb.callout_number = cc[1 + 2*LINK_SIZE];
404 cb.callout_string_offset = 0;
405 cb.callout_string_length = 0;
406 cb.callout_string = NULL;
407 rc = callback(&cb, callout_data);
408 if (rc != 0) return rc;
409 cc += PRIV(OP_lengths)[*cc];
410 break;
411
412 case OP_CALLOUT_STR:
413 cb.pattern_position = GET(cc, 1);
414 cb.next_item_length = GET(cc, 1 + LINK_SIZE);
415 cb.callout_number = 0;
416 cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE);
417 cb.callout_string_length =
418 GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2;
419 cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1;
420 rc = callback(&cb, callout_data);
421 if (rc != 0) return rc;
422 cc += GET(cc, 1 + 2*LINK_SIZE);
423 break;
424
425 default:
426 cc += PRIV(OP_lengths)[*cc];
427 break;
428 }
429 }
430}
431
432/* End of pcre2_pattern_info.c */