Blame - src/pcre2_intmodedep.h - platform/external/pcre

blob: f8a3d25de673c89f44ff4d145061de5e3b67a025 [file] [log] [blame]

Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	1	/*************************************************
				2	* Perl-Compatible Regular Expressions *
				3	*************************************************/
				4
				5	/* PCRE is a library of functions to support regular expressions whose syntax
				6	and semantics are as close as possible to those of the Perl 5 language.
				7
				8	Written by Philip Hazel
				9	Original API code Copyright (c) 1997-2012 University of Cambridge
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	10	New API code Copyright (c) 2016-2018 University of Cambridge
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	11
				12	-----------------------------------------------------------------------------
				13	Redistribution and use in source and binary forms, with or without
				14	modification, are permitted provided that the following conditions are met:
				15
				16	* Redistributions of source code must retain the above copyright notice,
				17	this list of conditions and the following disclaimer.
				18
				19	* Redistributions in binary form must reproduce the above copyright
				20	notice, this list of conditions and the following disclaimer in the
				21	documentation and/or other materials provided with the distribution.
				22
				23	* Neither the name of the University of Cambridge nor the names of its
				24	contributors may be used to endorse or promote products derived from
				25	this software without specific prior written permission.
				26
				27	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
				28	AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				29	IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				30	ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
				31	LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
				32	CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
				33	SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
				34	INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
				35	CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
				36	ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
				37	POSSIBILITY OF SUCH DAMAGE.
				38	-----------------------------------------------------------------------------
				39	*/
				40
				41
				42	/* This module contains mode-dependent macro and structure definitions. The
				43	file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined.
				44	These mode-dependent items are kept in a separate file so that they can also be
				45	#included multiple times for different code unit widths by pcre2test in order
				46	to have access to the hidden structures at all supported widths.
				47
				48	Some of the mode-dependent macros are required at different widths for
				49	different parts of the pcre2test code (in particular, the included
				50	pcre_printint.c file). We undefine them here so that they can be re-defined for
				51	multiple inclusions. Not all of these are used in pcre2test, but it's easier
				52	just to undefine them all. */
				53
				54	#undef ACROSSCHAR
				55	#undef BACKCHAR
				56	#undef BYTES2CU
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	57	#undef CHMAX_255
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	58	#undef CU2BYTES
				59	#undef FORWARDCHAR
				60	#undef FORWARDCHARTEST
				61	#undef GET
				62	#undef GET2
				63	#undef GETCHAR
				64	#undef GETCHARINC
				65	#undef GETCHARINCTEST
				66	#undef GETCHARLEN
				67	#undef GETCHARLENTEST
				68	#undef GETCHARTEST
				69	#undef GET_EXTRALEN
				70	#undef HAS_EXTRALEN
				71	#undef IMM2_SIZE
				72	#undef MAX_255
				73	#undef MAX_MARK
				74	#undef MAX_PATTERN_SIZE
				75	#undef MAX_UTF_SINGLE_CU
				76	#undef NOT_FIRSTCU
				77	#undef PUT
				78	#undef PUT2
				79	#undef PUT2INC
				80	#undef PUTCHAR
				81	#undef PUTINC
				82	#undef TABLE_GET
				83
				84
				85
				86	/* -------------------------- MACROS ----------------------------- */
				87
				88	/* PCRE keeps offsets in its compiled code as at least 16-bit quantities
				89	(always stored in big-endian order in 8-bit mode) by default. These are used,
				90	for example, to link from the start of a subpattern to its alternatives and its
				91	end. The use of 16 bits per offset limits the size of an 8-bit compiled regex
				92	to around 64K, which is big enough for almost everybody. However, I received a
				93	request for an even bigger limit. For this reason, and also to make the code
				94	easier to maintain, the storing and loading of offsets from the compiled code
				95	unit string is now handled by the macros that are defined here.
				96
				97	The macros are controlled by the value of LINK_SIZE. This defaults to 2, but
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	98	values of 3 or 4 are also supported. */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	99
				100	/* ------------------- 8-bit support ------------------ */
				101
				102	#if PCRE2_CODE_UNIT_WIDTH == 8
				103
				104	#if LINK_SIZE == 2
				105	#define PUT(a,n,d) \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	106	(a[n] = (PCRE2_UCHAR)((d) >> 8)), \
				107	(a[(n)+1] = (PCRE2_UCHAR)((d) & 255))
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	108	#define GET(a,n) \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	109	(unsigned int)(((a)[n] << 8) \| (a)[(n)+1])
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	110	#define MAX_PATTERN_SIZE (1 << 16)
				111
				112	#elif LINK_SIZE == 3
				113	#define PUT(a,n,d) \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	114	(a[n] = (PCRE2_UCHAR)((d) >> 16)), \
				115	(a[(n)+1] = (PCRE2_UCHAR)((d) >> 8)), \
				116	(a[(n)+2] = (PCRE2_UCHAR)((d) & 255))
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	117	#define GET(a,n) \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	118	(unsigned int)(((a)[n] << 16) \| ((a)[(n)+1] << 8) \| (a)[(n)+2])
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	119	#define MAX_PATTERN_SIZE (1 << 24)
				120
				121	#elif LINK_SIZE == 4
				122	#define PUT(a,n,d) \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	123	(a[n] = (PCRE2_UCHAR)((d) >> 24)), \
				124	(a[(n)+1] = (PCRE2_UCHAR)((d) >> 16)), \
				125	(a[(n)+2] = (PCRE2_UCHAR)((d) >> 8)), \
				126	(a[(n)+3] = (PCRE2_UCHAR)((d) & 255))
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	127	#define GET(a,n) \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	128	(unsigned int)(((a)[n] << 24) \| ((a)[(n)+1] << 16) \| ((a)[(n)+2] << 8) \| (a)[(n)+3])
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	129	#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
				130
				131	#else
				132	#error LINK_SIZE must be 2, 3, or 4
				133	#endif
				134
				135
				136	/* ------------------- 16-bit support ------------------ */
				137
				138	#elif PCRE2_CODE_UNIT_WIDTH == 16
				139
				140	#if LINK_SIZE == 2
				141	#undef LINK_SIZE
				142	#define LINK_SIZE 1
				143	#define PUT(a,n,d) \
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	144	(a[n] = (PCRE2_UCHAR)(d))
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	145	#define GET(a,n) \
				146	(a[n])
				147	#define MAX_PATTERN_SIZE (1 << 16)
				148
				149	#elif LINK_SIZE == 3 \|\| LINK_SIZE == 4
				150	#undef LINK_SIZE
				151	#define LINK_SIZE 2
				152	#define PUT(a,n,d) \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	153	(a[n] = (PCRE2_UCHAR)((d) >> 16)), \
				154	(a[(n)+1] = (PCRE2_UCHAR)((d) & 65535))
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	155	#define GET(a,n) \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	156	(unsigned int)(((a)[n] << 16) \| (a)[(n)+1])
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	157	#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
				158
				159	#else
				160	#error LINK_SIZE must be 2, 3, or 4
				161	#endif
				162
				163
				164	/* ------------------- 32-bit support ------------------ */
				165
				166	#elif PCRE2_CODE_UNIT_WIDTH == 32
				167	#undef LINK_SIZE
				168	#define LINK_SIZE 1
				169	#define PUT(a,n,d) \
				170	(a[n] = (d))
				171	#define GET(a,n) \
				172	(a[n])
				173	#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
				174
				175	#else
				176	#error Unsupported compiling mode
				177	#endif
				178
				179
				180	/* --------------- Other mode-specific macros ----------------- */
				181
				182	/* PCRE uses some other (at least) 16-bit quantities that do not change when
				183	the size of offsets changes. There are used for repeat counts and for other
				184	things such as capturing parenthesis numbers in back references.
				185
				186	Define the number of code units required to hold a 16-bit count/offset, and
				187	macros to load and store such a value. For reasons that I do not understand,
				188	the expression in the 8-bit GET2 macro is treated by gcc as a signed
				189	expression, even when a is declared as unsigned. It seems that any kind of
				190	arithmetic results in a signed value. Hence the cast. */
				191
				192	#if PCRE2_CODE_UNIT_WIDTH == 8
				193	#define IMM2_SIZE 2
				194	#define GET2(a,n) (unsigned int)(((a)[n] << 8) \| (a)[(n)+1])
				195	#define PUT2(a,n,d) a[n] = (d) >> 8, a[(n)+1] = (d) & 255
				196
				197	#else /* Code units are 16 or 32 bits */
				198	#define IMM2_SIZE 1
				199	#define GET2(a,n) a[n]
				200	#define PUT2(a,n,d) a[n] = d
				201	#endif
				202
				203	/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	204	whether its argument, which is assumed to be one code unit, is less than 256.
				205	The CHMAX_255 macro does not assume one code unit. The maximum length of a MARK
				206	name must fit in one code unit; currently it is set to 255 or 65535. The
				207	TABLE_GET macro is used to access elements of tables containing exactly 256
Elliott Hughes	2dbd7d2	2020-06-03 14:32:37 -0700	[diff] [blame]	208	items. Its argument is a code unit. When code points can be greater than 255, a
				209	check is needed before accessing these tables. */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	210
				211	#if PCRE2_CODE_UNIT_WIDTH == 8
				212	#define MAX_255(c) TRUE
				213	#define MAX_MARK ((1u << 8) - 1)
Elliott Hughes	2dbd7d2	2020-06-03 14:32:37 -0700	[diff] [blame]	214	#define TABLE_GET(c, table, default) ((table)[c])
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	215	#ifdef SUPPORT_UNICODE
				216	#define SUPPORT_WIDE_CHARS
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	217	#define CHMAX_255(c) ((c) <= 255u)
				218	#else
				219	#define CHMAX_255(c) TRUE
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	220	#endif /* SUPPORT_UNICODE */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	221
				222	#else /* Code units are 16 or 32 bits */
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	223	#define CHMAX_255(c) ((c) <= 255u)
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	224	#define MAX_255(c) ((c) <= 255u)
				225	#define MAX_MARK ((1u << 16) - 1)
				226	#define SUPPORT_WIDE_CHARS
				227	#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
				228	#endif
				229
				230
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	231	/* ----------------- Character-handling macros ----------------- */
				232
				233	/* There is a proposed future special "UTF-21" mode, in which only the lowest
				234	21 bits of a 32-bit character are interpreted as UTF, with the remaining 11
				235	high-order bits available to the application for other uses. In preparation for
				236	the future implementation of this mode, there are macros that load a data item
				237	and, if in this special mode, mask it to 21 bits. These macros all have names
				238	starting with UCHAR21. In all other modes, including the normal 32-bit
				239	library, the macros all have the same simple definitions. When the new mode is
				240	implemented, it is expected that these definitions will be varied appropriately
				241	using #ifdef when compiling the library that supports the special mode. */
				242
				243	#define UCHAR21(eptr) (*(eptr))
				244	#define UCHAR21TEST(eptr) (*(eptr))
				245	#define UCHAR21INC(eptr) (*(eptr)++)
				246	#define UCHAR21INCTEST(eptr) (*(eptr)++)
				247
				248	/* When UTF encoding is being used, a character is no longer just a single
				249	byte in 8-bit mode or a single short in 16-bit mode. The macros for character
				250	handling generate simple sequences when used in the basic mode, and more
				251	complicated ones for UTF characters. GETCHARLENTEST and other macros are not
				252	used when UTF is not supported. To make sure they can never even appear when
				253	UTF support is omitted, we don't even define them. */
				254
				255	#ifndef SUPPORT_UNICODE
				256
				257	/* #define MAX_UTF_SINGLE_CU */
				258	/* #define HAS_EXTRALEN(c) */
				259	/* #define GET_EXTRALEN(c) */
				260	/* #define NOT_FIRSTCU(c) */
				261	#define GETCHAR(c, eptr) c = *eptr;
				262	#define GETCHARTEST(c, eptr) c = *eptr;
				263	#define GETCHARINC(c, eptr) c = *eptr++;
				264	#define GETCHARINCTEST(c, eptr) c = *eptr++;
				265	#define GETCHARLEN(c, eptr, len) c = *eptr;
				266	#define PUTCHAR(c, p) (*p = c, 1)
				267	/* #define GETCHARLENTEST(c, eptr, len) */
				268	/* #define BACKCHAR(eptr) */
				269	/* #define FORWARDCHAR(eptr) */
				270	/* #define FORWARCCHARTEST(eptr,end) */
				271	/* #define ACROSSCHAR(condition, eptr, action) */
				272
				273	#else /* SUPPORT_UNICODE */
				274
				275	/* ------------------- 8-bit support ------------------ */
				276
				277	#if PCRE2_CODE_UNIT_WIDTH == 8
				278	#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
				279
				280	/* The largest UTF code point that can be encoded as a single code unit. */
				281
				282	#define MAX_UTF_SINGLE_CU 127
				283
				284	/* Tests whether the code point needs extra characters to decode. */
				285
				286	#define HAS_EXTRALEN(c) HASUTF8EXTRALEN(c)
				287
				288	/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
				289	Otherwise it has an undefined behaviour. */
				290
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	291	#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3fu])
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	292
				293	/* Returns TRUE, if the given value is not the first code unit of a UTF
				294	sequence. */
				295
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	296	#define NOT_FIRSTCU(c) (((c) & 0xc0u) == 0x80u)
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	297
				298	/* Get the next UTF-8 character, not advancing the pointer. This is called when
				299	we know we are in UTF-8 mode. */
				300
				301	#define GETCHAR(c, eptr) \
				302	c = *eptr; \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	303	if (c >= 0xc0u) GETUTF8(c, eptr);
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	304
				305	/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
				306	pointer. */
				307
				308	#define GETCHARTEST(c, eptr) \
				309	c = *eptr; \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	310	if (utf && c >= 0xc0u) GETUTF8(c, eptr);
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	311
				312	/* Get the next UTF-8 character, advancing the pointer. This is called when we
				313	know we are in UTF-8 mode. */
				314
				315	#define GETCHARINC(c, eptr) \
				316	c = *eptr++; \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	317	if (c >= 0xc0u) GETUTF8INC(c, eptr);
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	318
				319	/* Get the next character, testing for UTF-8 mode, and advancing the pointer.
				320	This is called when we don't know if we are in UTF-8 mode. */
				321
				322	#define GETCHARINCTEST(c, eptr) \
				323	c = *eptr++; \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	324	if (utf && c >= 0xc0u) GETUTF8INC(c, eptr);
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	325
				326	/* Get the next UTF-8 character, not advancing the pointer, incrementing length
				327	if there are extra bytes. This is called when we know we are in UTF-8 mode. */
				328
				329	#define GETCHARLEN(c, eptr, len) \
				330	c = *eptr; \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	331	if (c >= 0xc0u) GETUTF8LEN(c, eptr, len);
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	332
				333	/* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the
				334	pointer, incrementing length if there are extra bytes. This is called when we
				335	do not know if we are in UTF-8 mode. */
				336
				337	#define GETCHARLENTEST(c, eptr, len) \
				338	c = *eptr; \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	339	if (utf && c >= 0xc0u) GETUTF8LEN(c, eptr, len);
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	340
				341	/* If the pointer is not at the start of a character, move it back until
				342	it is. This is called only in UTF-8 mode - we don't put a test within the macro
				343	because almost all calls are already within a block of UTF-8 only code. */
				344
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	345	#define BACKCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr--
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	346
				347	/* Same as above, just in the other direction. */
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	348	#define FORWARDCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr++
				349	#define FORWARDCHARTEST(eptr,end) while(eptr < end && (*eptr & 0xc0u) == 0x80u) eptr++
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	350
				351	/* Same as above, but it allows a fully customizable form. */
				352	#define ACROSSCHAR(condition, eptr, action) \
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	353	while((condition) && ((*eptr) & 0xc0u) == 0x80u) action
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	354
				355	/* Deposit a character into memory, returning the number of code units. */
				356
				357	#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
				358	PRIV(ord2utf)(c,p) : (*p = c, 1))
				359
				360
				361	/* ------------------- 16-bit support ------------------ */
				362
				363	#elif PCRE2_CODE_UNIT_WIDTH == 16
				364	#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
				365
				366	/* The largest UTF code point that can be encoded as a single code unit. */
				367
				368	#define MAX_UTF_SINGLE_CU 65535
				369
				370	/* Tests whether the code point needs extra characters to decode. */
				371
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	372	#define HAS_EXTRALEN(c) (((c) & 0xfc00u) == 0xd800u)
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	373
				374	/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
				375	Otherwise it has an undefined behaviour. */
				376
				377	#define GET_EXTRALEN(c) 1
				378
				379	/* Returns TRUE, if the given value is not the first code unit of a UTF
				380	sequence. */
				381
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	382	#define NOT_FIRSTCU(c) (((c) & 0xfc00u) == 0xdc00u)
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	383
				384	/* Base macro to pick up the low surrogate of a UTF-16 character, not
				385	advancing the pointer. */
				386
				387	#define GETUTF16(c, eptr) \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	388	{ c = (((c & 0x3ffu) << 10) \| (eptr[1] & 0x3ffu)) + 0x10000u; }
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	389
				390	/* Get the next UTF-16 character, not advancing the pointer. This is called when
				391	we know we are in UTF-16 mode. */
				392
				393	#define GETCHAR(c, eptr) \
				394	c = *eptr; \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	395	if ((c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr);
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	396
				397	/* Get the next UTF-16 character, testing for UTF-16 mode, and not advancing the
				398	pointer. */
				399
				400	#define GETCHARTEST(c, eptr) \
				401	c = *eptr; \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	402	if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr);
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	403
				404	/* Base macro to pick up the low surrogate of a UTF-16 character, advancing
				405	the pointer. */
				406
				407	#define GETUTF16INC(c, eptr) \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	408	{ c = (((c & 0x3ffu) << 10) \| (*eptr++ & 0x3ffu)) + 0x10000u; }
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	409
				410	/* Get the next UTF-16 character, advancing the pointer. This is called when we
				411	know we are in UTF-16 mode. */
				412
				413	#define GETCHARINC(c, eptr) \
				414	c = *eptr++; \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	415	if ((c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr);
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	416
				417	/* Get the next character, testing for UTF-16 mode, and advancing the pointer.
				418	This is called when we don't know if we are in UTF-16 mode. */
				419
				420	#define GETCHARINCTEST(c, eptr) \
				421	c = *eptr++; \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	422	if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr);
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	423
				424	/* Base macro to pick up the low surrogate of a UTF-16 character, not
				425	advancing the pointer, incrementing the length. */
				426
				427	#define GETUTF16LEN(c, eptr, len) \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	428	{ c = (((c & 0x3ffu) << 10) \| (eptr[1] & 0x3ffu)) + 0x10000u; len++; }
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	429
				430	/* Get the next UTF-16 character, not advancing the pointer, incrementing
				431	length if there is a low surrogate. This is called when we know we are in
				432	UTF-16 mode. */
				433
				434	#define GETCHARLEN(c, eptr, len) \
				435	c = *eptr; \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	436	if ((c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len);
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	437
				438	/* Get the next UTF-816character, testing for UTF-16 mode, not advancing the
				439	pointer, incrementing length if there is a low surrogate. This is called when
				440	we do not know if we are in UTF-16 mode. */
				441
				442	#define GETCHARLENTEST(c, eptr, len) \
				443	c = *eptr; \
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	444	if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len);
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	445
				446	/* If the pointer is not at the start of a character, move it back until
				447	it is. This is called only in UTF-16 mode - we don't put a test within the
				448	macro because almost all calls are already within a block of UTF-16 only
				449	code. */
				450
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	451	#define BACKCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr--
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	452
				453	/* Same as above, just in the other direction. */
Janis Danisevskis	8b979b2	2016-08-15 16:09:16 +0100	[diff] [blame]	454	#define FORWARDCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr++
				455	#define FORWARDCHARTEST(eptr,end) if (eptr < end && (*eptr & 0xfc00u) == 0xdc00u) eptr++
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	456
				457	/* Same as above, but it allows a fully customizable form. */
				458	#define ACROSSCHAR(condition, eptr, action) \
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	459	if ((condition) && ((*eptr) & 0xfc00u) == 0xdc00u) action
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	460
				461	/* Deposit a character into memory, returning the number of code units. */
				462
				463	#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
				464	PRIV(ord2utf)(c,p) : (*p = c, 1))
				465
				466
				467	/* ------------------- 32-bit support ------------------ */
				468
				469	#else
				470
				471	/* These are trivial for the 32-bit library, since all UTF-32 characters fit
				472	into one PCRE2_UCHAR unit. */
				473
				474	#define MAX_UTF_SINGLE_CU (0x10ffffu)
				475	#define HAS_EXTRALEN(c) (0)
				476	#define GET_EXTRALEN(c) (0)
				477	#define NOT_FIRSTCU(c) (0)
				478
				479	/* Get the next UTF-32 character, not advancing the pointer. This is called when
				480	we know we are in UTF-32 mode. */
				481
				482	#define GETCHAR(c, eptr) \
				483	c = *(eptr);
				484
				485	/* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the
				486	pointer. */
				487
				488	#define GETCHARTEST(c, eptr) \
				489	c = *(eptr);
				490
				491	/* Get the next UTF-32 character, advancing the pointer. This is called when we
				492	know we are in UTF-32 mode. */
				493
				494	#define GETCHARINC(c, eptr) \
				495	c = *((eptr)++);
				496
				497	/* Get the next character, testing for UTF-32 mode, and advancing the pointer.
				498	This is called when we don't know if we are in UTF-32 mode. */
				499
				500	#define GETCHARINCTEST(c, eptr) \
				501	c = *((eptr)++);
				502
				503	/* Get the next UTF-32 character, not advancing the pointer, not incrementing
				504	length (since all UTF-32 is of length 1). This is called when we know we are in
				505	UTF-32 mode. */
				506
				507	#define GETCHARLEN(c, eptr, len) \
				508	GETCHAR(c, eptr)
				509
				510	/* Get the next UTF-32character, testing for UTF-32 mode, not advancing the
				511	pointer, not incrementing the length (since all UTF-32 is of length 1).
				512	This is called when we do not know if we are in UTF-32 mode. */
				513
				514	#define GETCHARLENTEST(c, eptr, len) \
				515	GETCHARTEST(c, eptr)
				516
				517	/* If the pointer is not at the start of a character, move it back until
				518	it is. This is called only in UTF-32 mode - we don't put a test within the
				519	macro because almost all calls are already within a block of UTF-32 only
				520	code.
				521
Elliott Hughes	4e19c8e	2022-04-15 15:11:02 -0700	[diff] [blame]	522	These are all no-ops since all UTF-32 characters fit into one PCRE2_UCHAR. */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	523
				524	#define BACKCHAR(eptr) do { } while (0)
				525
				526	/* Same as above, just in the other direction. */
				527
				528	#define FORWARDCHAR(eptr) do { } while (0)
				529	#define FORWARDCHARTEST(eptr,end) do { } while (0)
				530
				531	/* Same as above, but it allows a fully customizable form. */
				532
				533	#define ACROSSCHAR(condition, eptr, action) do { } while (0)
				534
				535	/* Deposit a character into memory, returning the number of code units. */
				536
				537	#define PUTCHAR(c, p) (*p = c, 1)
				538
				539	#endif /* UTF-32 character handling */
				540	#endif /* SUPPORT_UNICODE */
				541
				542
				543	/* Mode-dependent macros that have the same definition in all modes. */
				544
				545	#define CU2BYTES(x) ((x)*((PCRE2_CODE_UNIT_WIDTH/8)))
				546	#define BYTES2CU(x) ((x)/((PCRE2_CODE_UNIT_WIDTH/8)))
				547	#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE
				548	#define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE
				549
				550
				551	/* ----------------------- HIDDEN STRUCTURES ----------------------------- */
				552
				553	/* NOTE: All these structures must start with a pcre2_memctl structure. The
				554	code that uses them is simpler because it assumes this. */
				555
				556	/* The real general context structure. At present it holds only data for custom
				557	memory control. */
				558
				559	typedef struct pcre2_real_general_context {
				560	pcre2_memctl memctl;
				561	} pcre2_real_general_context;
				562
				563	/* The real compile context structure */
				564
				565	typedef struct pcre2_real_compile_context {
				566	pcre2_memctl memctl;
				567	int (stack_guard)(uint32_t, void );
				568	void *stack_guard_data;
				569	const uint8_t *tables;
				570	PCRE2_SIZE max_pattern_length;
				571	uint16_t bsr_convention;
				572	uint16_t newline_convention;
				573	uint32_t parens_nest_limit;
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	574	uint32_t extra_options;
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	575	} pcre2_real_compile_context;
				576
				577	/* The real match context structure. */
				578
				579	typedef struct pcre2_real_match_context {
				580	pcre2_memctl memctl;
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	581	#ifdef SUPPORT_JIT
				582	pcre2_jit_callback jit_callback;
				583	void *jit_callback_data;
				584	#endif
				585	int (callout)(pcre2_callout_block , void *);
				586	void *callout_data;
Elliott Hughes	0c26e19	2019-08-07 12:24:46 -0700	[diff] [blame]	587	int (substitute_callout)(pcre2_substitute_callout_block , void *);
				588	void *substitute_callout_data;
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	589	PCRE2_SIZE offset_limit;
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	590	uint32_t heap_limit;
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	591	uint32_t match_limit;
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	592	uint32_t depth_limit;
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	593	} pcre2_real_match_context;
				594
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	595	/* The real convert context structure. */
				596
				597	typedef struct pcre2_real_convert_context {
				598	pcre2_memctl memctl;
				599	uint32_t glob_separator;
				600	uint32_t glob_escape;
				601	} pcre2_real_convert_context;
				602
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	603	/* The real compiled code structure. The type for the blocksize field is
				604	defined specially because it is required in pcre2_serialize_decode() when
				605	copying the size from possibly unaligned memory into a variable of the same
				606	type. Use a macro rather than a typedef to avoid compiler warnings when this
				607	file is included multiple times by pcre2test. LOOKBEHIND_MAX specifies the
				608	largest lookbehind that is supported. (OP_REVERSE in a pattern has a 16-bit
				609	argument in 8-bit and 16-bit modes, so we need no more than a 16-bit field
				610	here.) */
				611
				612	#undef CODE_BLOCKSIZE_TYPE
				613	#define CODE_BLOCKSIZE_TYPE size_t
				614
				615	#undef LOOKBEHIND_MAX
				616	#define LOOKBEHIND_MAX UINT16_MAX
				617
				618	typedef struct pcre2_real_code {
				619	pcre2_memctl memctl; /* Memory control fields */
				620	const uint8_t tables; / The character tables */
				621	void executable_jit; / Pointer to JIT code */
				622	uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */
				623	CODE_BLOCKSIZE_TYPE blocksize; /* Total (bytes) that was malloc-ed */
				624	uint32_t magic_number; /* Paranoid and endianness check */
				625	uint32_t compile_options; /* Options passed to pcre2_compile() */
				626	uint32_t overall_options; /* Options after processing the pattern */
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	627	uint32_t extra_options; /* Taken from compile_context */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	628	uint32_t flags; /* Various state flags */
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	629	uint32_t limit_heap; /* Limit set in the pattern */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	630	uint32_t limit_match; /* Limit set in the pattern */
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	631	uint32_t limit_depth; /* Limit set in the pattern */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	632	uint32_t first_codeunit; /* Starting code unit */
				633	uint32_t last_codeunit; /* This codeunit must be seen */
				634	uint16_t bsr_convention; /* What \R matches */
				635	uint16_t newline_convention; /* What is a newline? */
				636	uint16_t max_lookbehind; /* Longest lookbehind (characters) */
				637	uint16_t minlength; /* Minimum length of match */
				638	uint16_t top_bracket; /* Highest numbered group */
				639	uint16_t top_backref; /* Highest numbered back reference */
				640	uint16_t name_entry_size; /* Size (code units) of table entries */
				641	uint16_t name_count; /* Number of name entries in the table */
				642	} pcre2_real_code;
				643
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	644	/* The real match data structure. Define ovector as large as it can ever
				645	actually be so that array bound checkers don't grumble. Memory for this
				646	structure is obtained by calling pcre2_match_data_create(), which sets the size
				647	as the offset of ovector plus a pair of elements for each capturable string, so
				648	the size varies from call to call. As the maximum number of capturing
				649	subpatterns is 65535 we must allow for 65536 strings to include the overall
				650	match. (See also the heapframe structure below.) */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	651
				652	typedef struct pcre2_real_match_data {
				653	pcre2_memctl memctl;
				654	const pcre2_real_code code; / The pattern used for the match */
				655	PCRE2_SPTR subject; /* The subject that was matched */
				656	PCRE2_SPTR mark; /* Pointer to last mark */
				657	PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
				658	PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
				659	PCRE2_SIZE startchar; /* Offset to starting code unit */
Elliott Hughes	0c26e19	2019-08-07 12:24:46 -0700	[diff] [blame]	660	uint8_t matchedby; /* Type of match (normal, JIT, DFA) */
				661	uint8_t flags; /* Various flags */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	662	uint16_t oveccount; /* Number of pairs */
				663	int rc; /* The return code from the match */
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	664	PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	665	} pcre2_real_match_data;
				666
				667
				668	/* ----------------------- PRIVATE STRUCTURES ----------------------------- */
				669
				670	/* These structures are not needed for pcre2test. */
				671
				672	#ifndef PCRE2_PCRE2TEST
				673
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	674	/* Structures for checking for mutual recursion when scanning compiled or
				675	parsed code. */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	676
				677	typedef struct recurse_check {
				678	struct recurse_check *prev;
				679	PCRE2_SPTR group;
				680	} recurse_check;
				681
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	682	typedef struct parsed_recurse_check {
				683	struct parsed_recurse_check *prev;
				684	uint32_t *groupptr;
				685	} parsed_recurse_check;
				686
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	687	/* Structure for building a cache when filling in recursion offsets. */
				688
				689	typedef struct recurse_cache {
				690	PCRE2_SPTR group;
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	691	int groupnumber;
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	692	} recurse_cache;
				693
				694	/* Structure for maintaining a chain of pointers to the currently incomplete
				695	branches, for testing for left recursion while compiling. */
				696
				697	typedef struct branch_chain {
				698	struct branch_chain *outer;
				699	PCRE2_UCHAR *current_branch;
				700	} branch_chain;
				701
				702	/* Structure for building a list of named groups during the first pass of
				703	compiling. */
				704
				705	typedef struct named_group {
				706	PCRE2_SPTR name; /* Points to the name in the pattern */
				707	uint32_t number; /* Group number */
				708	uint16_t length; /* Length of the name */
				709	uint16_t isdup; /* TRUE if a duplicate */
				710	} named_group;
				711
				712	/* Structure for passing "static" information around between the functions
				713	doing the compiling, so that they are thread-safe. */
				714
				715	typedef struct compile_block {
				716	pcre2_real_compile_context cx; / Points to the compile context */
				717	const uint8_t lcc; / Points to lower casing table */
				718	const uint8_t fcc; / Points to case-flipping table */
				719	const uint8_t cbits; / Points to character type table */
				720	const uint8_t ctypes; / Points to table of type maps */
				721	PCRE2_SPTR start_workspace; /* The start of working space */
				722	PCRE2_SPTR start_code; /* The start of the compiled code */
				723	PCRE2_SPTR start_pattern; /* The start of the pattern */
				724	PCRE2_SPTR end_pattern; /* The end of the pattern */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	725	PCRE2_UCHAR name_table; / The name/number table */
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	726	PCRE2_SIZE workspace_size; /* Size of workspace */
				727	PCRE2_SIZE small_ref_offset[10]; /* Offsets for \1 to \9 */
				728	PCRE2_SIZE erroroffset; /* Offset of error in pattern */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	729	uint16_t names_found; /* Number of entries so far */
				730	uint16_t name_entry_size; /* Size of each entry */
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	731	uint16_t parens_depth; /* Depth of nested parentheses */
				732	uint16_t assert_depth; /* Depth of nested assertions */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	733	open_capitem open_caps; / Chain of open capture items */
				734	named_group named_groups; / Points to vector in pre-compile */
				735	uint32_t named_group_list_size; /* Number of entries in the list */
				736	uint32_t external_options; /* External (initial) options */
				737	uint32_t external_flags; /* External flag bits to be set */
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	738	uint32_t bracount; /* Count of capturing parentheses */
				739	uint32_t lastcapture; /* Last capture encountered */
				740	uint32_t parsed_pattern; / Parsed pattern buffer */
				741	uint32_t parsed_pattern_end; / Parsed pattern should not get here */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	742	uint32_t groupinfo; / Group info vector */
				743	uint32_t top_backref; /* Maximum back reference */
				744	uint32_t backref_map; /* Bitmap of low back refs */
				745	uint32_t nltype; /* Newline type */
				746	uint32_t nllen; /* Newline string length */
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	747	uint32_t class_range_start; /* Overall class range start */
				748	uint32_t class_range_end; /* Overall class range end */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	749	PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
Elliott Hughes	4e19c8e	2022-04-15 15:11:02 -0700	[diff] [blame]	750	uint32_t req_varyopt; /* "After variable item" flag for reqbyte */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	751	int max_lookbehind; /* Maximum lookbehind (characters) */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	752	BOOL had_accept; /* (ACCEPT) encountered /
				753	BOOL had_pruneorskip; /* (PRUNE) or (SKIP) encountered */
				754	BOOL had_recurse; /* Had a recursion or subroutine call */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	755	BOOL dupnames; /* Duplicate names exist */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	756	} compile_block;
				757
				758	/* Structure for keeping the properties of the in-memory stack used
				759	by the JIT matcher. */
				760
				761	typedef struct pcre2_real_jit_stack {
				762	pcre2_memctl memctl;
				763	void* stack;
				764	} pcre2_real_jit_stack;
				765
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	766	/* Structure for items in a linked list that represents an explicit recursive
Elliott Hughes	4e19c8e	2022-04-15 15:11:02 -0700	[diff] [blame]	767	call within the pattern when running pcre2_dfa_match(). */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	768
				769	typedef struct dfa_recursion_info {
				770	struct dfa_recursion_info *prevrec;
				771	PCRE2_SPTR subject_position;
				772	uint32_t group_num;
				773	} dfa_recursion_info;
				774
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	775	/* Structure for "stack" frames that are used for remembering backtracking
				776	positions during matching. As these are used in a vector, with the ovector item
				777	being extended, the size of the structure must be a multiple of PCRE2_SIZE. The
				778	only way to check this at compile time is to force an error by generating an
				779	array with a negative size. By putting this in a typedef (which is never used),
				780	we don't generate any code when all is well. */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	781
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	782	typedef struct heapframe {
				783
				784	/* The first set of fields are variables that have to be preserved over calls
				785	to RRMATCH(), but which do not need to be copied to new frames. */
				786
				787	PCRE2_SPTR ecode; /* The current position in the pattern */
				788	PCRE2_SPTR temp_sptr[2]; /* Used for short-term PCRE_SPTR values */
				789	PCRE2_SIZE length; /* Used for character, string, or code lengths */
				790	PCRE2_SIZE back_frame; /* Amount to subtract on RRETURN */
				791	PCRE2_SIZE temp_size; /* Used for short-term PCRE2_SIZE values */
				792	uint32_t rdepth; /* "Recursion" depth */
				793	uint32_t group_frame_type; /* Type information for group frames */
				794	uint32_t temp_32[4]; /* Used for short-term 32-bit or BOOL values */
				795	uint8_t return_id; /* Where to go on in internal "return" */
				796	uint8_t op; /* Processing opcode */
				797
Elliott Hughes	653c210	2019-01-09 15:41:36 -0800	[diff] [blame]	798	/* At this point, the structure is 16-bit aligned. On most architectures
				799	the alignment requirement for a pointer will ensure that the eptr field below
				800	is 32-bit or 64-bit aligned. However, on m68k it is fine to have a pointer
				801	that is 16-bit aligned. We must therefore ensure that what comes between here
				802	and eptr is an odd multiple of 16 bits so as to get back into 32-bit
				803	alignment. This happens naturally when PCRE2_UCHAR is 8 bits wide, but needs
				804	fudges in the other cases. In the 32-bit case the padding comes first so that
				805	the occu field itself is 32-bit aligned. Without the padding, this structure
				806	is no longer a multiple of PCRE2_SIZE on m68k, and the check below fails. */
				807
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	808	#if PCRE2_CODE_UNIT_WIDTH == 8
				809	PCRE2_UCHAR occu[6]; /* Used for other case code units */
				810	#elif PCRE2_CODE_UNIT_WIDTH == 16
				811	PCRE2_UCHAR occu[2]; /* Used for other case code units */
Elliott Hughes	653c210	2019-01-09 15:41:36 -0800	[diff] [blame]	812	uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	813	#else
Elliott Hughes	653c210	2019-01-09 15:41:36 -0800	[diff] [blame]	814	uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	815	PCRE2_UCHAR occu[1]; /* Used for other case code units */
				816	#endif
				817
				818	/* The rest have to be copied from the previous frame whenever a new frame
				819	becomes current. The final field is specified as a large vector so that
				820	runtime array bound checks don't catch references to it. However, for any
				821	specific call to pcre2_match() the memory allocated for each frame structure
				822	allows for exactly the right size ovector for the number of capturing
				823	parentheses. (See also the comment for pcre2_real_match_data above.) */
				824
				825	PCRE2_SPTR eptr; /* MUST BE FIRST */
				826	PCRE2_SPTR start_match; /* Can be adjusted by \K */
				827	PCRE2_SPTR mark; /* Most recent mark on the success path */
				828	uint32_t current_recurse; /* Current (deepest) recursion number */
				829	uint32_t capture_last; /* Most recent capture */
				830	PCRE2_SIZE last_group_offset; /* Saved offset to most recent group frame */
				831	PCRE2_SIZE offset_top; /* Offset after highest capture */
				832	PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
				833	} heapframe;
				834
Elliott Hughes	653c210	2019-01-09 15:41:36 -0800	[diff] [blame]	835	/* This typedef is a check that the size of the heapframe structure is a
				836	multiple of PCRE2_SIZE. See various comments above. */
				837
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	838	typedef char check_heapframe_size[
				839	((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)];
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	840
Elliott Hughes	4e19c8e	2022-04-15 15:11:02 -0700	[diff] [blame]	841	/* Structure for computing the alignment of heapframe. */
				842
				843	typedef struct heapframe_align {
				844	char unalign; /* Completely unalign the current offset */
				845	heapframe frame; /* Offset is its alignment */
				846	} heapframe_align;
				847
				848	/* This define is the minimum alignment required for a heapframe, in bytes. */
				849
				850	#define HEAPFRAME_ALIGNMENT offsetof(heapframe_align, frame)
				851
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	852	/* Structure for passing "static" information around between the functions
				853	doing traditional NFA matching (pcre2_match() and friends). */
				854
				855	typedef struct match_block {
				856	pcre2_memctl memctl; /* For general use */
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	857	PCRE2_SIZE frame_vector_size; /* Size of a backtracking frame */
				858	heapframe match_frames; / Points to vector of frames */
				859	heapframe match_frames_top; / Points after the end of the vector */
				860	heapframe stack_frames; / The original vector on the stack */
				861	PCRE2_SIZE heap_limit; /* As it says */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	862	uint32_t match_limit; /* As it says */
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	863	uint32_t match_limit_depth; /* As it says */
				864	uint32_t match_call_count; /* Number of times a new frame is created */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	865	BOOL hitend; /* Hit the end of the subject at some point */
				866	BOOL hasthen; /* Pattern contains (THEN) /
Elliott Hughes	2dbd7d2	2020-06-03 14:32:37 -0700	[diff] [blame]	867	BOOL allowemptypartial; /* Allow empty hard partial */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	868	const uint8_t lcc; / Points to lower casing table */
				869	const uint8_t fcc; / Points to case-flipping table */
				870	const uint8_t ctypes; / Points to table of type maps */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	871	PCRE2_SIZE start_offset; /* The start offset value */
				872	PCRE2_SIZE end_offset_top; /* Highwater mark at end of match */
				873	uint16_t partial; /* PARTIAL options */
				874	uint16_t bsr_convention; /* \R interpretation */
				875	uint16_t name_count; /* Number of names in name table */
				876	uint16_t name_entry_size; /* Size of entry in names table */
				877	PCRE2_SPTR name_table; /* Table of group names */
				878	PCRE2_SPTR start_code; /* For use when recursing */
				879	PCRE2_SPTR start_subject; /* Start of the subject string */
Elliott Hughes	2dbd7d2	2020-06-03 14:32:37 -0700	[diff] [blame]	880	PCRE2_SPTR check_subject; /* Where UTF-checked from */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	881	PCRE2_SPTR end_subject; /* End of the subject string */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	882	PCRE2_SPTR end_match_ptr; /* Subject position at end match */
				883	PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
				884	PCRE2_SPTR last_used_ptr; /* Latest consulted character */
				885	PCRE2_SPTR mark; /* Mark pointer to pass back on success */
				886	PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	887	PCRE2_SPTR verb_ecode_ptr; /* For passing back info */
				888	PCRE2_SPTR verb_skip_ptr; /* For passing back a (SKIP) name /
				889	uint32_t verb_current_recurse; /* Current recurse when (VERB) happens /
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	890	uint32_t moptions; /* Match options */
				891	uint32_t poptions; /* Pattern options */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	892	uint32_t skip_arg_count; /* For counting SKIP_ARGs */
				893	uint32_t ignore_skip_arg; /* For re-run when SKIP arg name not found */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	894	uint32_t nltype; /* Newline type */
				895	uint32_t nllen; /* Newline string length */
				896	PCRE2_UCHAR nl[4]; /* Newline string when fixed */
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	897	pcre2_callout_block cb; / Points to a callout block */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	898	void callout_data; / To pass back to callouts */
				899	int (callout)(pcre2_callout_block ,void ); / Callout function or NULL */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	900	} match_block;
				901
				902	/* A similar structure is used for the same purpose by the DFA matching
				903	functions. */
				904
				905	typedef struct dfa_match_block {
				906	pcre2_memctl memctl; /* For general use */
				907	PCRE2_SPTR start_code; /* Start of the compiled pattern */
				908	PCRE2_SPTR start_subject ; /* Start of the subject string */
				909	PCRE2_SPTR end_subject; /* End of subject string */
				910	PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
				911	PCRE2_SPTR last_used_ptr; /* Latest consulted character */
				912	const uint8_t tables; / Character tables */
				913	PCRE2_SIZE start_offset; /* The start offset value */
Elliott Hughes	653c210	2019-01-09 15:41:36 -0800	[diff] [blame]	914	PCRE2_SIZE heap_limit; /* As it says */
				915	PCRE2_SIZE heap_used; /* As it says */
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	916	uint32_t match_limit; /* As it says */
				917	uint32_t match_limit_depth; /* As it says */
				918	uint32_t match_call_count; /* Number of calls of internal function */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	919	uint32_t moptions; /* Match options */
				920	uint32_t poptions; /* Pattern options */
				921	uint32_t nltype; /* Newline type */
				922	uint32_t nllen; /* Newline string length */
Elliott Hughes	2dbd7d2	2020-06-03 14:32:37 -0700	[diff] [blame]	923	BOOL allowemptypartial; /* Allow empty hard partial */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	924	PCRE2_UCHAR nl[4]; /* Newline string when fixed */
				925	uint16_t bsr_convention; /* \R interpretation */
Elliott Hughes	9bc971b	2018-07-27 13:23:14 -0700	[diff] [blame]	926	pcre2_callout_block cb; / Points to a callout block */
Janis Danisevskis	53e448c	2016-03-31 13:35:25 +0100	[diff] [blame]	927	void callout_data; / To pass back to callouts */
				928	int (callout)(pcre2_callout_block ,void ); / Callout function or NULL */
				929	dfa_recursion_info recursive; / Linked list of recursion data */
				930	} dfa_match_block;
				931
				932	#endif /* PCRE2_PCRE2TEST */
				933
				934	/* End of pcre2_intmodedep.h */