blob: f8a3d25de673c89f44ff4d145061de5e3b67a025 [file] [log] [blame]
Janis Danisevskis53e448c2016-03-31 13:35:25 +01001/*************************************************
2* Perl-Compatible Regular Expressions *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
Elliott Hughes9bc971b2018-07-27 13:23:14 -070010 New API code Copyright (c) 2016-2018 University of Cambridge
Janis Danisevskis53e448c2016-03-31 13:35:25 +010011
12-----------------------------------------------------------------------------
13Redistribution and use in source and binary forms, with or without
14modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37POSSIBILITY OF SUCH DAMAGE.
38-----------------------------------------------------------------------------
39*/
40
41
42/* This module contains mode-dependent macro and structure definitions. The
43file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined.
44These mode-dependent items are kept in a separate file so that they can also be
45#included multiple times for different code unit widths by pcre2test in order
46to have access to the hidden structures at all supported widths.
47
48Some of the mode-dependent macros are required at different widths for
49different parts of the pcre2test code (in particular, the included
50pcre_printint.c file). We undefine them here so that they can be re-defined for
51multiple inclusions. Not all of these are used in pcre2test, but it's easier
52just to undefine them all. */
53
54#undef ACROSSCHAR
55#undef BACKCHAR
56#undef BYTES2CU
Elliott Hughes9bc971b2018-07-27 13:23:14 -070057#undef CHMAX_255
Janis Danisevskis53e448c2016-03-31 13:35:25 +010058#undef CU2BYTES
59#undef FORWARDCHAR
60#undef FORWARDCHARTEST
61#undef GET
62#undef GET2
63#undef GETCHAR
64#undef GETCHARINC
65#undef GETCHARINCTEST
66#undef GETCHARLEN
67#undef GETCHARLENTEST
68#undef GETCHARTEST
69#undef GET_EXTRALEN
70#undef HAS_EXTRALEN
71#undef IMM2_SIZE
72#undef MAX_255
73#undef MAX_MARK
74#undef MAX_PATTERN_SIZE
75#undef MAX_UTF_SINGLE_CU
76#undef NOT_FIRSTCU
77#undef PUT
78#undef PUT2
79#undef PUT2INC
80#undef PUTCHAR
81#undef PUTINC
82#undef TABLE_GET
83
84
85
86/* -------------------------- MACROS ----------------------------- */
87
88/* PCRE keeps offsets in its compiled code as at least 16-bit quantities
89(always stored in big-endian order in 8-bit mode) by default. These are used,
90for example, to link from the start of a subpattern to its alternatives and its
91end. The use of 16 bits per offset limits the size of an 8-bit compiled regex
92to around 64K, which is big enough for almost everybody. However, I received a
93request for an even bigger limit. For this reason, and also to make the code
94easier to maintain, the storing and loading of offsets from the compiled code
95unit string is now handled by the macros that are defined here.
96
97The macros are controlled by the value of LINK_SIZE. This defaults to 2, but
Janis Danisevskis8b979b22016-08-15 16:09:16 +010098values of 3 or 4 are also supported. */
Janis Danisevskis53e448c2016-03-31 13:35:25 +010099
100/* ------------------- 8-bit support ------------------ */
101
102#if PCRE2_CODE_UNIT_WIDTH == 8
103
104#if LINK_SIZE == 2
105#define PUT(a,n,d) \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100106 (a[n] = (PCRE2_UCHAR)((d) >> 8)), \
107 (a[(n)+1] = (PCRE2_UCHAR)((d) & 255))
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100108#define GET(a,n) \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100109 (unsigned int)(((a)[n] << 8) | (a)[(n)+1])
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100110#define MAX_PATTERN_SIZE (1 << 16)
111
112#elif LINK_SIZE == 3
113#define PUT(a,n,d) \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100114 (a[n] = (PCRE2_UCHAR)((d) >> 16)), \
115 (a[(n)+1] = (PCRE2_UCHAR)((d) >> 8)), \
116 (a[(n)+2] = (PCRE2_UCHAR)((d) & 255))
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100117#define GET(a,n) \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100118 (unsigned int)(((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2])
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100119#define MAX_PATTERN_SIZE (1 << 24)
120
121#elif LINK_SIZE == 4
122#define PUT(a,n,d) \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100123 (a[n] = (PCRE2_UCHAR)((d) >> 24)), \
124 (a[(n)+1] = (PCRE2_UCHAR)((d) >> 16)), \
125 (a[(n)+2] = (PCRE2_UCHAR)((d) >> 8)), \
126 (a[(n)+3] = (PCRE2_UCHAR)((d) & 255))
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100127#define GET(a,n) \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100128 (unsigned int)(((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3])
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100129#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
130
131#else
132#error LINK_SIZE must be 2, 3, or 4
133#endif
134
135
136/* ------------------- 16-bit support ------------------ */
137
138#elif PCRE2_CODE_UNIT_WIDTH == 16
139
140#if LINK_SIZE == 2
141#undef LINK_SIZE
142#define LINK_SIZE 1
143#define PUT(a,n,d) \
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700144 (a[n] = (PCRE2_UCHAR)(d))
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100145#define GET(a,n) \
146 (a[n])
147#define MAX_PATTERN_SIZE (1 << 16)
148
149#elif LINK_SIZE == 3 || LINK_SIZE == 4
150#undef LINK_SIZE
151#define LINK_SIZE 2
152#define PUT(a,n,d) \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100153 (a[n] = (PCRE2_UCHAR)((d) >> 16)), \
154 (a[(n)+1] = (PCRE2_UCHAR)((d) & 65535))
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100155#define GET(a,n) \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100156 (unsigned int)(((a)[n] << 16) | (a)[(n)+1])
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100157#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
158
159#else
160#error LINK_SIZE must be 2, 3, or 4
161#endif
162
163
164/* ------------------- 32-bit support ------------------ */
165
166#elif PCRE2_CODE_UNIT_WIDTH == 32
167#undef LINK_SIZE
168#define LINK_SIZE 1
169#define PUT(a,n,d) \
170 (a[n] = (d))
171#define GET(a,n) \
172 (a[n])
173#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
174
175#else
176#error Unsupported compiling mode
177#endif
178
179
180/* --------------- Other mode-specific macros ----------------- */
181
182/* PCRE uses some other (at least) 16-bit quantities that do not change when
183the size of offsets changes. There are used for repeat counts and for other
184things such as capturing parenthesis numbers in back references.
185
186Define the number of code units required to hold a 16-bit count/offset, and
187macros to load and store such a value. For reasons that I do not understand,
188the expression in the 8-bit GET2 macro is treated by gcc as a signed
189expression, even when a is declared as unsigned. It seems that any kind of
190arithmetic results in a signed value. Hence the cast. */
191
192#if PCRE2_CODE_UNIT_WIDTH == 8
193#define IMM2_SIZE 2
194#define GET2(a,n) (unsigned int)(((a)[n] << 8) | (a)[(n)+1])
195#define PUT2(a,n,d) a[n] = (d) >> 8, a[(n)+1] = (d) & 255
196
197#else /* Code units are 16 or 32 bits */
198#define IMM2_SIZE 1
199#define GET2(a,n) a[n]
200#define PUT2(a,n,d) a[n] = d
201#endif
202
203/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700204whether its argument, which is assumed to be one code unit, is less than 256.
205The CHMAX_255 macro does not assume one code unit. The maximum length of a MARK
206name must fit in one code unit; currently it is set to 255 or 65535. The
207TABLE_GET macro is used to access elements of tables containing exactly 256
Elliott Hughes2dbd7d22020-06-03 14:32:37 -0700208items. Its argument is a code unit. When code points can be greater than 255, a
209check is needed before accessing these tables. */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100210
211#if PCRE2_CODE_UNIT_WIDTH == 8
212#define MAX_255(c) TRUE
213#define MAX_MARK ((1u << 8) - 1)
Elliott Hughes2dbd7d22020-06-03 14:32:37 -0700214#define TABLE_GET(c, table, default) ((table)[c])
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100215#ifdef SUPPORT_UNICODE
216#define SUPPORT_WIDE_CHARS
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700217#define CHMAX_255(c) ((c) <= 255u)
218#else
219#define CHMAX_255(c) TRUE
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100220#endif /* SUPPORT_UNICODE */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100221
222#else /* Code units are 16 or 32 bits */
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700223#define CHMAX_255(c) ((c) <= 255u)
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100224#define MAX_255(c) ((c) <= 255u)
225#define MAX_MARK ((1u << 16) - 1)
226#define SUPPORT_WIDE_CHARS
227#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
228#endif
229
230
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100231/* ----------------- Character-handling macros ----------------- */
232
233/* There is a proposed future special "UTF-21" mode, in which only the lowest
23421 bits of a 32-bit character are interpreted as UTF, with the remaining 11
235high-order bits available to the application for other uses. In preparation for
236the future implementation of this mode, there are macros that load a data item
237and, if in this special mode, mask it to 21 bits. These macros all have names
238starting with UCHAR21. In all other modes, including the normal 32-bit
239library, the macros all have the same simple definitions. When the new mode is
240implemented, it is expected that these definitions will be varied appropriately
241using #ifdef when compiling the library that supports the special mode. */
242
243#define UCHAR21(eptr) (*(eptr))
244#define UCHAR21TEST(eptr) (*(eptr))
245#define UCHAR21INC(eptr) (*(eptr)++)
246#define UCHAR21INCTEST(eptr) (*(eptr)++)
247
248/* When UTF encoding is being used, a character is no longer just a single
249byte in 8-bit mode or a single short in 16-bit mode. The macros for character
250handling generate simple sequences when used in the basic mode, and more
251complicated ones for UTF characters. GETCHARLENTEST and other macros are not
252used when UTF is not supported. To make sure they can never even appear when
253UTF support is omitted, we don't even define them. */
254
255#ifndef SUPPORT_UNICODE
256
257/* #define MAX_UTF_SINGLE_CU */
258/* #define HAS_EXTRALEN(c) */
259/* #define GET_EXTRALEN(c) */
260/* #define NOT_FIRSTCU(c) */
261#define GETCHAR(c, eptr) c = *eptr;
262#define GETCHARTEST(c, eptr) c = *eptr;
263#define GETCHARINC(c, eptr) c = *eptr++;
264#define GETCHARINCTEST(c, eptr) c = *eptr++;
265#define GETCHARLEN(c, eptr, len) c = *eptr;
266#define PUTCHAR(c, p) (*p = c, 1)
267/* #define GETCHARLENTEST(c, eptr, len) */
268/* #define BACKCHAR(eptr) */
269/* #define FORWARDCHAR(eptr) */
270/* #define FORWARCCHARTEST(eptr,end) */
271/* #define ACROSSCHAR(condition, eptr, action) */
272
273#else /* SUPPORT_UNICODE */
274
275/* ------------------- 8-bit support ------------------ */
276
277#if PCRE2_CODE_UNIT_WIDTH == 8
278#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
279
280/* The largest UTF code point that can be encoded as a single code unit. */
281
282#define MAX_UTF_SINGLE_CU 127
283
284/* Tests whether the code point needs extra characters to decode. */
285
286#define HAS_EXTRALEN(c) HASUTF8EXTRALEN(c)
287
288/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
289Otherwise it has an undefined behaviour. */
290
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100291#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3fu])
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100292
293/* Returns TRUE, if the given value is not the first code unit of a UTF
294sequence. */
295
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100296#define NOT_FIRSTCU(c) (((c) & 0xc0u) == 0x80u)
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100297
298/* Get the next UTF-8 character, not advancing the pointer. This is called when
299we know we are in UTF-8 mode. */
300
301#define GETCHAR(c, eptr) \
302 c = *eptr; \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100303 if (c >= 0xc0u) GETUTF8(c, eptr);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100304
305/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
306pointer. */
307
308#define GETCHARTEST(c, eptr) \
309 c = *eptr; \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100310 if (utf && c >= 0xc0u) GETUTF8(c, eptr);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100311
312/* Get the next UTF-8 character, advancing the pointer. This is called when we
313know we are in UTF-8 mode. */
314
315#define GETCHARINC(c, eptr) \
316 c = *eptr++; \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100317 if (c >= 0xc0u) GETUTF8INC(c, eptr);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100318
319/* Get the next character, testing for UTF-8 mode, and advancing the pointer.
320This is called when we don't know if we are in UTF-8 mode. */
321
322#define GETCHARINCTEST(c, eptr) \
323 c = *eptr++; \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100324 if (utf && c >= 0xc0u) GETUTF8INC(c, eptr);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100325
326/* Get the next UTF-8 character, not advancing the pointer, incrementing length
327if there are extra bytes. This is called when we know we are in UTF-8 mode. */
328
329#define GETCHARLEN(c, eptr, len) \
330 c = *eptr; \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100331 if (c >= 0xc0u) GETUTF8LEN(c, eptr, len);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100332
333/* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the
334pointer, incrementing length if there are extra bytes. This is called when we
335do not know if we are in UTF-8 mode. */
336
337#define GETCHARLENTEST(c, eptr, len) \
338 c = *eptr; \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100339 if (utf && c >= 0xc0u) GETUTF8LEN(c, eptr, len);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100340
341/* If the pointer is not at the start of a character, move it back until
342it is. This is called only in UTF-8 mode - we don't put a test within the macro
343because almost all calls are already within a block of UTF-8 only code. */
344
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100345#define BACKCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr--
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100346
347/* Same as above, just in the other direction. */
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100348#define FORWARDCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr++
349#define FORWARDCHARTEST(eptr,end) while(eptr < end && (*eptr & 0xc0u) == 0x80u) eptr++
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100350
351/* Same as above, but it allows a fully customizable form. */
352#define ACROSSCHAR(condition, eptr, action) \
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700353 while((condition) && ((*eptr) & 0xc0u) == 0x80u) action
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100354
355/* Deposit a character into memory, returning the number of code units. */
356
357#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
358 PRIV(ord2utf)(c,p) : (*p = c, 1))
359
360
361/* ------------------- 16-bit support ------------------ */
362
363#elif PCRE2_CODE_UNIT_WIDTH == 16
364#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
365
366/* The largest UTF code point that can be encoded as a single code unit. */
367
368#define MAX_UTF_SINGLE_CU 65535
369
370/* Tests whether the code point needs extra characters to decode. */
371
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100372#define HAS_EXTRALEN(c) (((c) & 0xfc00u) == 0xd800u)
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100373
374/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
375Otherwise it has an undefined behaviour. */
376
377#define GET_EXTRALEN(c) 1
378
379/* Returns TRUE, if the given value is not the first code unit of a UTF
380sequence. */
381
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100382#define NOT_FIRSTCU(c) (((c) & 0xfc00u) == 0xdc00u)
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100383
384/* Base macro to pick up the low surrogate of a UTF-16 character, not
385advancing the pointer. */
386
387#define GETUTF16(c, eptr) \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100388 { c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; }
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100389
390/* Get the next UTF-16 character, not advancing the pointer. This is called when
391we know we are in UTF-16 mode. */
392
393#define GETCHAR(c, eptr) \
394 c = *eptr; \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100395 if ((c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100396
397/* Get the next UTF-16 character, testing for UTF-16 mode, and not advancing the
398pointer. */
399
400#define GETCHARTEST(c, eptr) \
401 c = *eptr; \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100402 if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100403
404/* Base macro to pick up the low surrogate of a UTF-16 character, advancing
405the pointer. */
406
407#define GETUTF16INC(c, eptr) \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100408 { c = (((c & 0x3ffu) << 10) | (*eptr++ & 0x3ffu)) + 0x10000u; }
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100409
410/* Get the next UTF-16 character, advancing the pointer. This is called when we
411know we are in UTF-16 mode. */
412
413#define GETCHARINC(c, eptr) \
414 c = *eptr++; \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100415 if ((c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100416
417/* Get the next character, testing for UTF-16 mode, and advancing the pointer.
418This is called when we don't know if we are in UTF-16 mode. */
419
420#define GETCHARINCTEST(c, eptr) \
421 c = *eptr++; \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100422 if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100423
424/* Base macro to pick up the low surrogate of a UTF-16 character, not
425advancing the pointer, incrementing the length. */
426
427#define GETUTF16LEN(c, eptr, len) \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100428 { c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; len++; }
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100429
430/* Get the next UTF-16 character, not advancing the pointer, incrementing
431length if there is a low surrogate. This is called when we know we are in
432UTF-16 mode. */
433
434#define GETCHARLEN(c, eptr, len) \
435 c = *eptr; \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100436 if ((c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100437
438/* Get the next UTF-816character, testing for UTF-16 mode, not advancing the
439pointer, incrementing length if there is a low surrogate. This is called when
440we do not know if we are in UTF-16 mode. */
441
442#define GETCHARLENTEST(c, eptr, len) \
443 c = *eptr; \
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100444 if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100445
446/* If the pointer is not at the start of a character, move it back until
447it is. This is called only in UTF-16 mode - we don't put a test within the
448macro because almost all calls are already within a block of UTF-16 only
449code. */
450
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100451#define BACKCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr--
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100452
453/* Same as above, just in the other direction. */
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100454#define FORWARDCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr++
455#define FORWARDCHARTEST(eptr,end) if (eptr < end && (*eptr & 0xfc00u) == 0xdc00u) eptr++
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100456
457/* Same as above, but it allows a fully customizable form. */
458#define ACROSSCHAR(condition, eptr, action) \
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700459 if ((condition) && ((*eptr) & 0xfc00u) == 0xdc00u) action
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100460
461/* Deposit a character into memory, returning the number of code units. */
462
463#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
464 PRIV(ord2utf)(c,p) : (*p = c, 1))
465
466
467/* ------------------- 32-bit support ------------------ */
468
469#else
470
471/* These are trivial for the 32-bit library, since all UTF-32 characters fit
472into one PCRE2_UCHAR unit. */
473
474#define MAX_UTF_SINGLE_CU (0x10ffffu)
475#define HAS_EXTRALEN(c) (0)
476#define GET_EXTRALEN(c) (0)
477#define NOT_FIRSTCU(c) (0)
478
479/* Get the next UTF-32 character, not advancing the pointer. This is called when
480we know we are in UTF-32 mode. */
481
482#define GETCHAR(c, eptr) \
483 c = *(eptr);
484
485/* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the
486pointer. */
487
488#define GETCHARTEST(c, eptr) \
489 c = *(eptr);
490
491/* Get the next UTF-32 character, advancing the pointer. This is called when we
492know we are in UTF-32 mode. */
493
494#define GETCHARINC(c, eptr) \
495 c = *((eptr)++);
496
497/* Get the next character, testing for UTF-32 mode, and advancing the pointer.
498This is called when we don't know if we are in UTF-32 mode. */
499
500#define GETCHARINCTEST(c, eptr) \
501 c = *((eptr)++);
502
503/* Get the next UTF-32 character, not advancing the pointer, not incrementing
504length (since all UTF-32 is of length 1). This is called when we know we are in
505UTF-32 mode. */
506
507#define GETCHARLEN(c, eptr, len) \
508 GETCHAR(c, eptr)
509
510/* Get the next UTF-32character, testing for UTF-32 mode, not advancing the
511pointer, not incrementing the length (since all UTF-32 is of length 1).
512This is called when we do not know if we are in UTF-32 mode. */
513
514#define GETCHARLENTEST(c, eptr, len) \
515 GETCHARTEST(c, eptr)
516
517/* If the pointer is not at the start of a character, move it back until
518it is. This is called only in UTF-32 mode - we don't put a test within the
519macro because almost all calls are already within a block of UTF-32 only
520code.
521
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700522These are all no-ops since all UTF-32 characters fit into one PCRE2_UCHAR. */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100523
524#define BACKCHAR(eptr) do { } while (0)
525
526/* Same as above, just in the other direction. */
527
528#define FORWARDCHAR(eptr) do { } while (0)
529#define FORWARDCHARTEST(eptr,end) do { } while (0)
530
531/* Same as above, but it allows a fully customizable form. */
532
533#define ACROSSCHAR(condition, eptr, action) do { } while (0)
534
535/* Deposit a character into memory, returning the number of code units. */
536
537#define PUTCHAR(c, p) (*p = c, 1)
538
539#endif /* UTF-32 character handling */
540#endif /* SUPPORT_UNICODE */
541
542
543/* Mode-dependent macros that have the same definition in all modes. */
544
545#define CU2BYTES(x) ((x)*((PCRE2_CODE_UNIT_WIDTH/8)))
546#define BYTES2CU(x) ((x)/((PCRE2_CODE_UNIT_WIDTH/8)))
547#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE
548#define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE
549
550
551/* ----------------------- HIDDEN STRUCTURES ----------------------------- */
552
553/* NOTE: All these structures *must* start with a pcre2_memctl structure. The
554code that uses them is simpler because it assumes this. */
555
556/* The real general context structure. At present it holds only data for custom
557memory control. */
558
559typedef struct pcre2_real_general_context {
560 pcre2_memctl memctl;
561} pcre2_real_general_context;
562
563/* The real compile context structure */
564
565typedef struct pcre2_real_compile_context {
566 pcre2_memctl memctl;
567 int (*stack_guard)(uint32_t, void *);
568 void *stack_guard_data;
569 const uint8_t *tables;
570 PCRE2_SIZE max_pattern_length;
571 uint16_t bsr_convention;
572 uint16_t newline_convention;
573 uint32_t parens_nest_limit;
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700574 uint32_t extra_options;
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100575} pcre2_real_compile_context;
576
577/* The real match context structure. */
578
579typedef struct pcre2_real_match_context {
580 pcre2_memctl memctl;
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100581#ifdef SUPPORT_JIT
582 pcre2_jit_callback jit_callback;
583 void *jit_callback_data;
584#endif
585 int (*callout)(pcre2_callout_block *, void *);
586 void *callout_data;
Elliott Hughes0c26e192019-08-07 12:24:46 -0700587 int (*substitute_callout)(pcre2_substitute_callout_block *, void *);
588 void *substitute_callout_data;
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100589 PCRE2_SIZE offset_limit;
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700590 uint32_t heap_limit;
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100591 uint32_t match_limit;
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700592 uint32_t depth_limit;
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100593} pcre2_real_match_context;
594
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700595/* The real convert context structure. */
596
597typedef struct pcre2_real_convert_context {
598 pcre2_memctl memctl;
599 uint32_t glob_separator;
600 uint32_t glob_escape;
601} pcre2_real_convert_context;
602
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100603/* The real compiled code structure. The type for the blocksize field is
604defined specially because it is required in pcre2_serialize_decode() when
605copying the size from possibly unaligned memory into a variable of the same
606type. Use a macro rather than a typedef to avoid compiler warnings when this
607file is included multiple times by pcre2test. LOOKBEHIND_MAX specifies the
608largest lookbehind that is supported. (OP_REVERSE in a pattern has a 16-bit
609argument in 8-bit and 16-bit modes, so we need no more than a 16-bit field
610here.) */
611
612#undef CODE_BLOCKSIZE_TYPE
613#define CODE_BLOCKSIZE_TYPE size_t
614
615#undef LOOKBEHIND_MAX
616#define LOOKBEHIND_MAX UINT16_MAX
617
618typedef struct pcre2_real_code {
619 pcre2_memctl memctl; /* Memory control fields */
620 const uint8_t *tables; /* The character tables */
621 void *executable_jit; /* Pointer to JIT code */
622 uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */
623 CODE_BLOCKSIZE_TYPE blocksize; /* Total (bytes) that was malloc-ed */
624 uint32_t magic_number; /* Paranoid and endianness check */
625 uint32_t compile_options; /* Options passed to pcre2_compile() */
626 uint32_t overall_options; /* Options after processing the pattern */
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700627 uint32_t extra_options; /* Taken from compile_context */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100628 uint32_t flags; /* Various state flags */
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700629 uint32_t limit_heap; /* Limit set in the pattern */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100630 uint32_t limit_match; /* Limit set in the pattern */
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700631 uint32_t limit_depth; /* Limit set in the pattern */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100632 uint32_t first_codeunit; /* Starting code unit */
633 uint32_t last_codeunit; /* This codeunit must be seen */
634 uint16_t bsr_convention; /* What \R matches */
635 uint16_t newline_convention; /* What is a newline? */
636 uint16_t max_lookbehind; /* Longest lookbehind (characters) */
637 uint16_t minlength; /* Minimum length of match */
638 uint16_t top_bracket; /* Highest numbered group */
639 uint16_t top_backref; /* Highest numbered back reference */
640 uint16_t name_entry_size; /* Size (code units) of table entries */
641 uint16_t name_count; /* Number of name entries in the table */
642} pcre2_real_code;
643
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700644/* The real match data structure. Define ovector as large as it can ever
645actually be so that array bound checkers don't grumble. Memory for this
646structure is obtained by calling pcre2_match_data_create(), which sets the size
647as the offset of ovector plus a pair of elements for each capturable string, so
648the size varies from call to call. As the maximum number of capturing
649subpatterns is 65535 we must allow for 65536 strings to include the overall
650match. (See also the heapframe structure below.) */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100651
652typedef struct pcre2_real_match_data {
653 pcre2_memctl memctl;
654 const pcre2_real_code *code; /* The pattern used for the match */
655 PCRE2_SPTR subject; /* The subject that was matched */
656 PCRE2_SPTR mark; /* Pointer to last mark */
657 PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
658 PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
659 PCRE2_SIZE startchar; /* Offset to starting code unit */
Elliott Hughes0c26e192019-08-07 12:24:46 -0700660 uint8_t matchedby; /* Type of match (normal, JIT, DFA) */
661 uint8_t flags; /* Various flags */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100662 uint16_t oveccount; /* Number of pairs */
663 int rc; /* The return code from the match */
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700664 PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100665} pcre2_real_match_data;
666
667
668/* ----------------------- PRIVATE STRUCTURES ----------------------------- */
669
670/* These structures are not needed for pcre2test. */
671
672#ifndef PCRE2_PCRE2TEST
673
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700674/* Structures for checking for mutual recursion when scanning compiled or
675parsed code. */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100676
677typedef struct recurse_check {
678 struct recurse_check *prev;
679 PCRE2_SPTR group;
680} recurse_check;
681
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700682typedef struct parsed_recurse_check {
683 struct parsed_recurse_check *prev;
684 uint32_t *groupptr;
685} parsed_recurse_check;
686
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100687/* Structure for building a cache when filling in recursion offsets. */
688
689typedef struct recurse_cache {
690 PCRE2_SPTR group;
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700691 int groupnumber;
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100692} recurse_cache;
693
694/* Structure for maintaining a chain of pointers to the currently incomplete
695branches, for testing for left recursion while compiling. */
696
697typedef struct branch_chain {
698 struct branch_chain *outer;
699 PCRE2_UCHAR *current_branch;
700} branch_chain;
701
702/* Structure for building a list of named groups during the first pass of
703compiling. */
704
705typedef struct named_group {
706 PCRE2_SPTR name; /* Points to the name in the pattern */
707 uint32_t number; /* Group number */
708 uint16_t length; /* Length of the name */
709 uint16_t isdup; /* TRUE if a duplicate */
710} named_group;
711
712/* Structure for passing "static" information around between the functions
713doing the compiling, so that they are thread-safe. */
714
715typedef struct compile_block {
716 pcre2_real_compile_context *cx; /* Points to the compile context */
717 const uint8_t *lcc; /* Points to lower casing table */
718 const uint8_t *fcc; /* Points to case-flipping table */
719 const uint8_t *cbits; /* Points to character type table */
720 const uint8_t *ctypes; /* Points to table of type maps */
721 PCRE2_SPTR start_workspace; /* The start of working space */
722 PCRE2_SPTR start_code; /* The start of the compiled code */
723 PCRE2_SPTR start_pattern; /* The start of the pattern */
724 PCRE2_SPTR end_pattern; /* The end of the pattern */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100725 PCRE2_UCHAR *name_table; /* The name/number table */
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700726 PCRE2_SIZE workspace_size; /* Size of workspace */
727 PCRE2_SIZE small_ref_offset[10]; /* Offsets for \1 to \9 */
728 PCRE2_SIZE erroroffset; /* Offset of error in pattern */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100729 uint16_t names_found; /* Number of entries so far */
730 uint16_t name_entry_size; /* Size of each entry */
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700731 uint16_t parens_depth; /* Depth of nested parentheses */
732 uint16_t assert_depth; /* Depth of nested assertions */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100733 open_capitem *open_caps; /* Chain of open capture items */
734 named_group *named_groups; /* Points to vector in pre-compile */
735 uint32_t named_group_list_size; /* Number of entries in the list */
736 uint32_t external_options; /* External (initial) options */
737 uint32_t external_flags; /* External flag bits to be set */
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700738 uint32_t bracount; /* Count of capturing parentheses */
739 uint32_t lastcapture; /* Last capture encountered */
740 uint32_t *parsed_pattern; /* Parsed pattern buffer */
741 uint32_t *parsed_pattern_end; /* Parsed pattern should not get here */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100742 uint32_t *groupinfo; /* Group info vector */
743 uint32_t top_backref; /* Maximum back reference */
744 uint32_t backref_map; /* Bitmap of low back refs */
745 uint32_t nltype; /* Newline type */
746 uint32_t nllen; /* Newline string length */
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700747 uint32_t class_range_start; /* Overall class range start */
748 uint32_t class_range_end; /* Overall class range end */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100749 PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700750 uint32_t req_varyopt; /* "After variable item" flag for reqbyte */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100751 int max_lookbehind; /* Maximum lookbehind (characters) */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100752 BOOL had_accept; /* (*ACCEPT) encountered */
753 BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
754 BOOL had_recurse; /* Had a recursion or subroutine call */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100755 BOOL dupnames; /* Duplicate names exist */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100756} compile_block;
757
758/* Structure for keeping the properties of the in-memory stack used
759by the JIT matcher. */
760
761typedef struct pcre2_real_jit_stack {
762 pcre2_memctl memctl;
763 void* stack;
764} pcre2_real_jit_stack;
765
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100766/* Structure for items in a linked list that represents an explicit recursive
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700767call within the pattern when running pcre2_dfa_match(). */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100768
769typedef struct dfa_recursion_info {
770 struct dfa_recursion_info *prevrec;
771 PCRE2_SPTR subject_position;
772 uint32_t group_num;
773} dfa_recursion_info;
774
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700775/* Structure for "stack" frames that are used for remembering backtracking
776positions during matching. As these are used in a vector, with the ovector item
777being extended, the size of the structure must be a multiple of PCRE2_SIZE. The
778only way to check this at compile time is to force an error by generating an
779array with a negative size. By putting this in a typedef (which is never used),
780we don't generate any code when all is well. */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100781
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700782typedef struct heapframe {
783
784 /* The first set of fields are variables that have to be preserved over calls
785 to RRMATCH(), but which do not need to be copied to new frames. */
786
787 PCRE2_SPTR ecode; /* The current position in the pattern */
788 PCRE2_SPTR temp_sptr[2]; /* Used for short-term PCRE_SPTR values */
789 PCRE2_SIZE length; /* Used for character, string, or code lengths */
790 PCRE2_SIZE back_frame; /* Amount to subtract on RRETURN */
791 PCRE2_SIZE temp_size; /* Used for short-term PCRE2_SIZE values */
792 uint32_t rdepth; /* "Recursion" depth */
793 uint32_t group_frame_type; /* Type information for group frames */
794 uint32_t temp_32[4]; /* Used for short-term 32-bit or BOOL values */
795 uint8_t return_id; /* Where to go on in internal "return" */
796 uint8_t op; /* Processing opcode */
797
Elliott Hughes653c2102019-01-09 15:41:36 -0800798 /* At this point, the structure is 16-bit aligned. On most architectures
799 the alignment requirement for a pointer will ensure that the eptr field below
800 is 32-bit or 64-bit aligned. However, on m68k it is fine to have a pointer
801 that is 16-bit aligned. We must therefore ensure that what comes between here
802 and eptr is an odd multiple of 16 bits so as to get back into 32-bit
803 alignment. This happens naturally when PCRE2_UCHAR is 8 bits wide, but needs
804 fudges in the other cases. In the 32-bit case the padding comes first so that
805 the occu field itself is 32-bit aligned. Without the padding, this structure
806 is no longer a multiple of PCRE2_SIZE on m68k, and the check below fails. */
807
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700808#if PCRE2_CODE_UNIT_WIDTH == 8
809 PCRE2_UCHAR occu[6]; /* Used for other case code units */
810#elif PCRE2_CODE_UNIT_WIDTH == 16
811 PCRE2_UCHAR occu[2]; /* Used for other case code units */
Elliott Hughes653c2102019-01-09 15:41:36 -0800812 uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700813#else
Elliott Hughes653c2102019-01-09 15:41:36 -0800814 uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700815 PCRE2_UCHAR occu[1]; /* Used for other case code units */
816#endif
817
818 /* The rest have to be copied from the previous frame whenever a new frame
819 becomes current. The final field is specified as a large vector so that
820 runtime array bound checks don't catch references to it. However, for any
821 specific call to pcre2_match() the memory allocated for each frame structure
822 allows for exactly the right size ovector for the number of capturing
823 parentheses. (See also the comment for pcre2_real_match_data above.) */
824
825 PCRE2_SPTR eptr; /* MUST BE FIRST */
826 PCRE2_SPTR start_match; /* Can be adjusted by \K */
827 PCRE2_SPTR mark; /* Most recent mark on the success path */
828 uint32_t current_recurse; /* Current (deepest) recursion number */
829 uint32_t capture_last; /* Most recent capture */
830 PCRE2_SIZE last_group_offset; /* Saved offset to most recent group frame */
831 PCRE2_SIZE offset_top; /* Offset after highest capture */
832 PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
833} heapframe;
834
Elliott Hughes653c2102019-01-09 15:41:36 -0800835/* This typedef is a check that the size of the heapframe structure is a
836multiple of PCRE2_SIZE. See various comments above. */
837
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700838typedef char check_heapframe_size[
839 ((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)];
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100840
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700841/* Structure for computing the alignment of heapframe. */
842
843typedef struct heapframe_align {
844 char unalign; /* Completely unalign the current offset */
845 heapframe frame; /* Offset is its alignment */
846} heapframe_align;
847
848/* This define is the minimum alignment required for a heapframe, in bytes. */
849
850#define HEAPFRAME_ALIGNMENT offsetof(heapframe_align, frame)
851
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100852/* Structure for passing "static" information around between the functions
853doing traditional NFA matching (pcre2_match() and friends). */
854
855typedef struct match_block {
856 pcre2_memctl memctl; /* For general use */
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700857 PCRE2_SIZE frame_vector_size; /* Size of a backtracking frame */
858 heapframe *match_frames; /* Points to vector of frames */
859 heapframe *match_frames_top; /* Points after the end of the vector */
860 heapframe *stack_frames; /* The original vector on the stack */
861 PCRE2_SIZE heap_limit; /* As it says */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100862 uint32_t match_limit; /* As it says */
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700863 uint32_t match_limit_depth; /* As it says */
864 uint32_t match_call_count; /* Number of times a new frame is created */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100865 BOOL hitend; /* Hit the end of the subject at some point */
866 BOOL hasthen; /* Pattern contains (*THEN) */
Elliott Hughes2dbd7d22020-06-03 14:32:37 -0700867 BOOL allowemptypartial; /* Allow empty hard partial */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100868 const uint8_t *lcc; /* Points to lower casing table */
869 const uint8_t *fcc; /* Points to case-flipping table */
870 const uint8_t *ctypes; /* Points to table of type maps */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100871 PCRE2_SIZE start_offset; /* The start offset value */
872 PCRE2_SIZE end_offset_top; /* Highwater mark at end of match */
873 uint16_t partial; /* PARTIAL options */
874 uint16_t bsr_convention; /* \R interpretation */
875 uint16_t name_count; /* Number of names in name table */
876 uint16_t name_entry_size; /* Size of entry in names table */
877 PCRE2_SPTR name_table; /* Table of group names */
878 PCRE2_SPTR start_code; /* For use when recursing */
879 PCRE2_SPTR start_subject; /* Start of the subject string */
Elliott Hughes2dbd7d22020-06-03 14:32:37 -0700880 PCRE2_SPTR check_subject; /* Where UTF-checked from */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100881 PCRE2_SPTR end_subject; /* End of the subject string */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100882 PCRE2_SPTR end_match_ptr; /* Subject position at end match */
883 PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
884 PCRE2_SPTR last_used_ptr; /* Latest consulted character */
885 PCRE2_SPTR mark; /* Mark pointer to pass back on success */
886 PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700887 PCRE2_SPTR verb_ecode_ptr; /* For passing back info */
888 PCRE2_SPTR verb_skip_ptr; /* For passing back a (*SKIP) name */
889 uint32_t verb_current_recurse; /* Current recurse when (*VERB) happens */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100890 uint32_t moptions; /* Match options */
891 uint32_t poptions; /* Pattern options */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100892 uint32_t skip_arg_count; /* For counting SKIP_ARGs */
893 uint32_t ignore_skip_arg; /* For re-run when SKIP arg name not found */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100894 uint32_t nltype; /* Newline type */
895 uint32_t nllen; /* Newline string length */
896 PCRE2_UCHAR nl[4]; /* Newline string when fixed */
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700897 pcre2_callout_block *cb; /* Points to a callout block */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100898 void *callout_data; /* To pass back to callouts */
899 int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100900} match_block;
901
902/* A similar structure is used for the same purpose by the DFA matching
903functions. */
904
905typedef struct dfa_match_block {
906 pcre2_memctl memctl; /* For general use */
907 PCRE2_SPTR start_code; /* Start of the compiled pattern */
908 PCRE2_SPTR start_subject ; /* Start of the subject string */
909 PCRE2_SPTR end_subject; /* End of subject string */
910 PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
911 PCRE2_SPTR last_used_ptr; /* Latest consulted character */
912 const uint8_t *tables; /* Character tables */
913 PCRE2_SIZE start_offset; /* The start offset value */
Elliott Hughes653c2102019-01-09 15:41:36 -0800914 PCRE2_SIZE heap_limit; /* As it says */
915 PCRE2_SIZE heap_used; /* As it says */
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700916 uint32_t match_limit; /* As it says */
917 uint32_t match_limit_depth; /* As it says */
918 uint32_t match_call_count; /* Number of calls of internal function */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100919 uint32_t moptions; /* Match options */
920 uint32_t poptions; /* Pattern options */
921 uint32_t nltype; /* Newline type */
922 uint32_t nllen; /* Newline string length */
Elliott Hughes2dbd7d22020-06-03 14:32:37 -0700923 BOOL allowemptypartial; /* Allow empty hard partial */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100924 PCRE2_UCHAR nl[4]; /* Newline string when fixed */
925 uint16_t bsr_convention; /* \R interpretation */
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700926 pcre2_callout_block *cb; /* Points to a callout block */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100927 void *callout_data; /* To pass back to callouts */
928 int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
929 dfa_recursion_info *recursive; /* Linked list of recursion data */
930} dfa_match_block;
931
932#endif /* PCRE2_PCRE2TEST */
933
934/* End of pcre2_intmodedep.h */