blob: ea52a204bcad81e55efbd0cc3ebf6514bea0793b [file] [log] [blame]
Elliott Hughes5b808042021-10-01 10:56:10 -07001/*************************************************
2* PCRE2 testing program *
3*************************************************/
4
5/* PCRE2 is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language. In 2014
7the API was completely revised and '2' was added to the name, because the old
8API, which had lasted for 16 years, could not accommodate new requirements. At
9the same time, this testing program was re-designed because its original
10hacked-up (non-) design had also run out of steam.
11
12 Written by Philip Hazel
13 Original code Copyright (c) 1997-2012 University of Cambridge
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070014 Rewritten code Copyright (c) 2016-2022 University of Cambridge
Elliott Hughes5b808042021-10-01 10:56:10 -070015
16-----------------------------------------------------------------------------
17Redistribution and use in source and binary forms, with or without
18modification, are permitted provided that the following conditions are met:
19
20 * Redistributions of source code must retain the above copyright notice,
21 this list of conditions and the following disclaimer.
22
23 * Redistributions in binary form must reproduce the above copyright
24 notice, this list of conditions and the following disclaimer in the
25 documentation and/or other materials provided with the distribution.
26
27 * Neither the name of the University of Cambridge nor the names of its
28 contributors may be used to endorse or promote products derived from
29 this software without specific prior written permission.
30
31THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41POSSIBILITY OF SUCH DAMAGE.
42-----------------------------------------------------------------------------
43*/
44
45
46/* This program supports testing of the 8-bit, 16-bit, and 32-bit PCRE2
47libraries in a single program, though its input and output are always 8-bit.
48It is different from modules such as pcre2_compile.c in the library itself,
49which are compiled separately for each code unit width. If two widths are
50enabled, for example, pcre2_compile.c is compiled twice. In contrast,
51pcre2test.c is compiled only once, and linked with all the enabled libraries.
52Therefore, it must not make use of any of the macros from pcre2.h or
53pcre2_internal.h that depend on PCRE2_CODE_UNIT_WIDTH. It does, however, make
54use of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, and SUPPORT_PCRE2_32, to ensure that
55it references only the enabled library functions. */
56
57#ifdef HAVE_CONFIG_H
58#include "config.h"
59#endif
60
61#include <ctype.h>
62#include <stdio.h>
63#include <string.h>
64#include <stdlib.h>
65#include <time.h>
66#include <locale.h>
67#include <errno.h>
68
69#if defined NATIVE_ZOS
70#include "pcrzoscs.h"
71/* That header is not included in the main PCRE2 distribution because other
72apparatus is needed to compile pcre2test for z/OS. The header can be found in
73the special z/OS distribution, which is available from www.zaconsultants.net or
74from www.cbttape.org. */
75#endif
76
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80
81/* Debugging code enabler */
82
83/* #define DEBUG_SHOW_MALLOC_ADDRESSES */
84
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070085/* Both libreadline and libedit are optionally supported */
Elliott Hughes5b808042021-10-01 10:56:10 -070086#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
87#if defined(SUPPORT_LIBREADLINE)
88#include <readline/readline.h>
89#include <readline/history.h>
90#else
91#if defined(HAVE_EDITLINE_READLINE_H)
92#include <editline/readline.h>
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070093#elif defined(HAVE_EDIT_READLINE_READLINE_H)
94#include <edit/readline/readline.h>
Elliott Hughes5b808042021-10-01 10:56:10 -070095#else
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070096#include <readline.h>
97/* GNU readline defines this macro but libedit doesn't, if that ever changes
98this needs to be updated or the build could break */
99#ifdef RL_VERSION_MAJOR
100#include <history.h>
101#endif
Elliott Hughes5b808042021-10-01 10:56:10 -0700102#endif
103#endif
104#endif
105
106/* Put the test for interactive input into a macro so that it can be changed if
107required for different environments. */
108
109#define INTERACTIVE(f) isatty(fileno(f))
110
111
112/* ---------------------- System-specific definitions ---------------------- */
113
114/* A number of things vary for Windows builds. Originally, pcretest opened its
115input and output without "b"; then I was told that "b" was needed in some
116environments, so it was added for release 5.0 to both the input and output. (It
117makes no difference on Unix-like systems.) Later I was told that it is wrong
118for the input on Windows. I've now abstracted the modes into macros that are
119set here, to make it easier to fiddle with them, and removed "b" from the input
120mode under Windows. The BINARY versions are used when saving/restoring compiled
121patterns. */
122
123#if defined(_WIN32) || defined(WIN32)
124#include <io.h> /* For _setmode() */
125#include <fcntl.h> /* For _O_BINARY */
126#define INPUT_MODE "r"
127#define OUTPUT_MODE "wb"
128#define BINARY_INPUT_MODE "rb"
129#define BINARY_OUTPUT_MODE "wb"
130
131#ifndef isatty
132#define isatty _isatty /* This is what Windows calls them, I'm told, */
133#endif /* though in some environments they seem to */
134 /* be already defined, hence the #ifndefs. */
135#ifndef fileno
136#define fileno _fileno
137#endif
138
139/* A user sent this fix for Borland Builder 5 under Windows. */
140
141#ifdef __BORLANDC__
142#define _setmode(handle, mode) setmode(handle, mode)
143#endif
144
145/* Not Windows */
146
147#else
148#include <sys/time.h> /* These two includes are needed */
149#include <sys/resource.h> /* for setrlimit(). */
150#if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
151#define INPUT_MODE "r"
152#define OUTPUT_MODE "w"
153#define BINARY_INPUT_MODE "rb"
154#define BINARY_OUTPUT_MODE "wb"
155#else
156#define INPUT_MODE "rb"
157#define OUTPUT_MODE "wb"
158#define BINARY_INPUT_MODE "rb"
159#define BINARY_OUTPUT_MODE "wb"
160#endif
161#endif
162
163/* VMS-specific code was included as suggested by a VMS user [1]. Another VMS
164user [2] provided alternative code which worked better for him. I have
165commented out the original, but kept it around just in case. */
166
167#ifdef __VMS
168#include <ssdef.h>
169/* These two includes came from [2]. */
170#include descrip
171#include lib$routines
172/* void vms_setsymbol( char *, char *, int ); Original code from [1]. */
173#endif
174
Elliott Hughes16619d62021-10-29 12:10:38 -0700175/* old VC and older compilers don't support %td or %zu, and even some that
176claim to be C99 don't support it (hence DISABLE_PERCENT_ZT). */
Elliott Hughes5b808042021-10-01 10:56:10 -0700177
Elliott Hughes16619d62021-10-29 12:10:38 -0700178#if defined(DISABLE_PERCENT_ZT) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \
179 (!defined(_MSC_VER) && (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L)))
180#ifdef _WIN64
181#define PTR_FORM "lld"
182#define SIZ_FORM "llu"
183#else
184#define PTR_FORM "ld"
Elliott Hughes5b808042021-10-01 10:56:10 -0700185#define SIZ_FORM "lu"
Elliott Hughes16619d62021-10-29 12:10:38 -0700186#endif
Elliott Hughes5b808042021-10-01 10:56:10 -0700187#else
188#define PTR_FORM "td"
189#define SIZ_FORM "zu"
Elliott Hughes5b808042021-10-01 10:56:10 -0700190#endif
191
192/* ------------------End of system-specific definitions -------------------- */
193
194/* Glueing macros that are used in several places below. */
195
196#define glue(a,b) a##b
197#define G(a,b) glue(a,b)
198
199/* Miscellaneous parameters and manifests */
200
201#ifndef CLOCKS_PER_SEC
202#ifdef CLK_TCK
203#define CLOCKS_PER_SEC CLK_TCK
204#else
205#define CLOCKS_PER_SEC 100
206#endif
207#endif
208
209#define CFORE_UNSET UINT32_MAX /* Unset value for startend/cfail/cerror fields */
210#define CONVERT_UNSET UINT32_MAX /* Unset value for convert_type field */
211#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
212#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
213#define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
214#define LOCALESIZE 32 /* Size of locale name */
215#define LOOPREPEAT 500000 /* Default loop count for timing */
216#define MALLOCLISTSIZE 20 /* For remembering mallocs */
217#define PARENS_NEST_DEFAULT 220 /* Default parentheses nest limit */
218#define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */
219#define REPLACE_MODSIZE 100 /* Field for reading 8-bit replacement */
220#define VERSION_SIZE 64 /* Size of buffer for the version strings */
221
222/* Default JIT compile options */
223
224#define JIT_DEFAULT (PCRE2_JIT_COMPLETE|\
225 PCRE2_JIT_PARTIAL_SOFT|\
226 PCRE2_JIT_PARTIAL_HARD)
227
228/* Make sure the buffer into which replacement strings are copied is big enough
229to hold them as 32-bit code units. */
230
231#define REPLACE_BUFFSIZE 1024 /* This is a byte value */
232
233/* Execution modes */
234
235#define PCRE8_MODE 8
236#define PCRE16_MODE 16
237#define PCRE32_MODE 32
238
239/* Processing returns */
240
241enum { PR_OK, PR_SKIP, PR_ABEND };
242
243/* The macro PRINTABLE determines whether to print an output character as-is or
244as a hex value when showing compiled patterns. is We use it in cases when the
245locale has not been explicitly changed, so as to get consistent output from
246systems that differ in their output from isprint() even in the "C" locale. */
247
248#ifdef EBCDIC
249#define PRINTABLE(c) ((c) >= 64 && (c) < 255)
250#else
251#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
252#endif
253
254#define PRINTOK(c) ((use_tables != NULL && c < 256)? isprint(c) : PRINTABLE(c))
255
256/* We have to include some of the library source files because we need
257to use some of the macros, internal structure definitions, and other internal
258values - pcre2test has "inside information" compared to an application program
259that strictly follows the PCRE2 API.
260
261Before including pcre2_internal.h we define PRIV so that it does not get
262defined therein. This ensures that PRIV names in the included files do not
263clash with those in the libraries. Also, although pcre2_internal.h does itself
264include pcre2.h, we explicitly include it beforehand, along with pcre2posix.h,
265so that the PCRE2_EXP_xxx macros get set appropriately for an application, not
266for building the library. */
267
268#define PRIV(name) name
269#define PCRE2_CODE_UNIT_WIDTH 0
270#include "pcre2.h"
271#include "pcre2posix.h"
272#include "pcre2_internal.h"
273
274/* We need access to some of the data tables that PCRE2 uses. Defining
275PCRE2_PCRETEST makes some minor changes in the files. The previous definition
276of PRIV avoids name clashes. */
277
278#define PCRE2_PCRE2TEST
279#include "pcre2_tables.c"
280#include "pcre2_ucd.c"
281
282/* 32-bit integer values in the input are read by strtoul() or strtol(). The
283check needed for overflow depends on whether long ints are in fact longer than
284ints. They are defined not to be shorter. */
285
286#if ULONG_MAX > UINT32_MAX
287#define U32OVERFLOW(x) (x > UINT32_MAX)
288#else
289#define U32OVERFLOW(x) (x == UINT32_MAX)
290#endif
291
292#if LONG_MAX > INT32_MAX
293#define S32OVERFLOW(x) (x > INT32_MAX || x < INT32_MIN)
294#else
295#define S32OVERFLOW(x) (x == INT32_MAX || x == INT32_MIN)
296#endif
297
298/* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include
299pcre2_intmodedep.h, which is where mode-dependent macros and structures are
300defined. We can now include it for each supported code unit width. Because
301PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will
302have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately
303while including these files, and then restore it to a no-op. Because LINK_SIZE
304may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of
305these inclusions should not be changed. */
306
307#undef PCRE2_SUFFIX
308#undef PCRE2_CODE_UNIT_WIDTH
309
310#ifdef SUPPORT_PCRE2_8
311#define PCRE2_CODE_UNIT_WIDTH 8
312#define PCRE2_SUFFIX(a) G(a,8)
313#include "pcre2_intmodedep.h"
314#include "pcre2_printint.c"
315#undef PCRE2_CODE_UNIT_WIDTH
316#undef PCRE2_SUFFIX
317#endif /* SUPPORT_PCRE2_8 */
318
319#ifdef SUPPORT_PCRE2_16
320#define PCRE2_CODE_UNIT_WIDTH 16
321#define PCRE2_SUFFIX(a) G(a,16)
322#include "pcre2_intmodedep.h"
323#include "pcre2_printint.c"
324#undef PCRE2_CODE_UNIT_WIDTH
325#undef PCRE2_SUFFIX
326#endif /* SUPPORT_PCRE2_16 */
327
328#ifdef SUPPORT_PCRE2_32
329#define PCRE2_CODE_UNIT_WIDTH 32
330#define PCRE2_SUFFIX(a) G(a,32)
331#include "pcre2_intmodedep.h"
332#include "pcre2_printint.c"
333#undef PCRE2_CODE_UNIT_WIDTH
334#undef PCRE2_SUFFIX
335#endif /* SUPPORT_PCRE2_32 */
336
337#define PCRE2_SUFFIX(a) a
338
339/* We need to be able to check input text for UTF-8 validity, whatever code
340widths are actually available, because the input to pcre2test is always in
3418-bit code units. So we include the UTF validity checking function for 8-bit
342code units. */
343
344extern int valid_utf(PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE *);
345
346#define PCRE2_CODE_UNIT_WIDTH 8
347#undef PCRE2_SPTR
348#define PCRE2_SPTR PCRE2_SPTR8
349#include "pcre2_valid_utf.c"
350#undef PCRE2_CODE_UNIT_WIDTH
351#undef PCRE2_SPTR
352
353/* If we have 8-bit support, default to it; if there is also 16-or 32-bit
354support, it can be selected by a command-line option. If there is no 8-bit
355support, there must be 16-bit or 32-bit support, so default to one of them. The
356config function, JIT stack, contexts, and version string are the same in all
357modes, so use the form of the first that is available. */
358
359#if defined SUPPORT_PCRE2_8
360#define DEFAULT_TEST_MODE PCRE8_MODE
361#define VERSION_TYPE PCRE2_UCHAR8
362#define PCRE2_CONFIG pcre2_config_8
363#define PCRE2_JIT_STACK pcre2_jit_stack_8
364#define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8
365#define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8
366#define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_8
367#define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8
368
369#elif defined SUPPORT_PCRE2_16
370#define DEFAULT_TEST_MODE PCRE16_MODE
371#define VERSION_TYPE PCRE2_UCHAR16
372#define PCRE2_CONFIG pcre2_config_16
373#define PCRE2_JIT_STACK pcre2_jit_stack_16
374#define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16
375#define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16
376#define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_16
377#define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16
378
379#elif defined SUPPORT_PCRE2_32
380#define DEFAULT_TEST_MODE PCRE32_MODE
381#define VERSION_TYPE PCRE2_UCHAR32
382#define PCRE2_CONFIG pcre2_config_32
383#define PCRE2_JIT_STACK pcre2_jit_stack_32
384#define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32
385#define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32
386#define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_32
387#define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32
388#endif
389
390/* ------------- Structure and table for handling #-commands ------------- */
391
392typedef struct cmdstruct {
393 const char *name;
394 int value;
395} cmdstruct;
396
397enum { CMD_FORBID_UTF, CMD_LOAD, CMD_LOADTABLES, CMD_NEWLINE_DEFAULT,
398 CMD_PATTERN, CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT,
399 CMD_UNKNOWN };
400
401static cmdstruct cmdlist[] = {
402 { "forbid_utf", CMD_FORBID_UTF },
403 { "load", CMD_LOAD },
404 { "loadtables", CMD_LOADTABLES },
405 { "newline_default", CMD_NEWLINE_DEFAULT },
406 { "pattern", CMD_PATTERN },
407 { "perltest", CMD_PERLTEST },
408 { "pop", CMD_POP },
409 { "popcopy", CMD_POPCOPY },
410 { "save", CMD_SAVE },
411 { "subject", CMD_SUBJECT }};
412
413#define cmdlistcount (sizeof(cmdlist)/sizeof(cmdstruct))
414
415/* ------------- Structures and tables for handling modifiers -------------- */
416
417/* Table of names for newline types. Must be kept in step with the definitions
418of PCRE2_NEWLINE_xx in pcre2.h. */
419
420static const char *newlines[] = {
421 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
422
423/* Structure and table for handling pattern conversion types. */
424
425typedef struct convertstruct {
426 const char *name;
427 uint32_t option;
428} convertstruct;
429
430static convertstruct convertlist[] = {
431 { "glob", PCRE2_CONVERT_GLOB },
432 { "glob_no_starstar", PCRE2_CONVERT_GLOB_NO_STARSTAR },
433 { "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR },
434 { "posix_basic", PCRE2_CONVERT_POSIX_BASIC },
435 { "posix_extended", PCRE2_CONVERT_POSIX_EXTENDED },
436 { "unset", CONVERT_UNSET }};
437
438#define convertlistcount (sizeof(convertlist)/sizeof(convertstruct))
439
440/* Modifier types and applicability */
441
442enum { MOD_CTC, /* Applies to a compile context */
443 MOD_CTM, /* Applies to a match context */
444 MOD_PAT, /* Applies to a pattern */
445 MOD_PATP, /* Ditto, OK for Perl test */
446 MOD_DAT, /* Applies to a data line */
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700447 MOD_DATP, /* Ditto, OK for Perl test */
Elliott Hughes5b808042021-10-01 10:56:10 -0700448 MOD_PD, /* Applies to a pattern or a data line */
449 MOD_PDP, /* As MOD_PD, OK for Perl test */
450 MOD_PND, /* As MOD_PD, but not for a default pattern */
451 MOD_PNDP, /* As MOD_PND, OK for Perl test */
452 MOD_CHR, /* Is a single character */
453 MOD_CON, /* Is a "convert" type/options list */
454 MOD_CTL, /* Is a control bit */
455 MOD_BSR, /* Is a BSR value */
456 MOD_IN2, /* Is one or two unsigned integers */
457 MOD_INS, /* Is a signed integer */
458 MOD_INT, /* Is an unsigned integer */
459 MOD_IND, /* Is an unsigned integer, but no value => default */
460 MOD_NL, /* Is a newline value */
461 MOD_NN, /* Is a number or a name; more than one may occur */
462 MOD_OPT, /* Is an option bit */
463 MOD_SIZ, /* Is a PCRE2_SIZE value */
464 MOD_STR }; /* Is a string */
465
466/* Control bits. Some apply to compiling, some to matching, but some can be set
467either on a pattern or a data line, so they must all be distinct. There are now
468so many of them that they are split into two fields. */
469
470#define CTL_AFTERTEXT 0x00000001u
471#define CTL_ALLAFTERTEXT 0x00000002u
472#define CTL_ALLCAPTURES 0x00000004u
473#define CTL_ALLUSEDTEXT 0x00000008u
474#define CTL_ALTGLOBAL 0x00000010u
475#define CTL_BINCODE 0x00000020u
476#define CTL_CALLOUT_CAPTURE 0x00000040u
477#define CTL_CALLOUT_INFO 0x00000080u
478#define CTL_CALLOUT_NONE 0x00000100u
479#define CTL_DFA 0x00000200u
480#define CTL_EXPAND 0x00000400u
481#define CTL_FINDLIMITS 0x00000800u
482#define CTL_FRAMESIZE 0x00001000u
483#define CTL_FULLBINCODE 0x00002000u
484#define CTL_GETALL 0x00004000u
485#define CTL_GLOBAL 0x00008000u
486#define CTL_HEXPAT 0x00010000u /* Same word as USE_LENGTH */
487#define CTL_INFO 0x00020000u
488#define CTL_JITFAST 0x00040000u
489#define CTL_JITVERIFY 0x00080000u
490#define CTL_MARK 0x00100000u
491#define CTL_MEMORY 0x00200000u
492#define CTL_NULLCONTEXT 0x00400000u
493#define CTL_POSIX 0x00800000u
494#define CTL_POSIX_NOSUB 0x01000000u
495#define CTL_PUSH 0x02000000u /* These three must be */
496#define CTL_PUSHCOPY 0x04000000u /* all in the same */
497#define CTL_PUSHTABLESCOPY 0x08000000u /* word. */
498#define CTL_STARTCHAR 0x10000000u
499#define CTL_USE_LENGTH 0x20000000u /* Same word as HEXPAT */
500#define CTL_UTF8_INPUT 0x40000000u
501#define CTL_ZERO_TERMINATE 0x80000000u
502
503/* Combinations */
504
505#define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */
506#define CTL_ANYINFO (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO)
507#define CTL_ANYGLOB (CTL_ALTGLOBAL|CTL_GLOBAL)
508
509/* Second control word */
510
511#define CTL2_SUBSTITUTE_CALLOUT 0x00000001u
512#define CTL2_SUBSTITUTE_EXTENDED 0x00000002u
513#define CTL2_SUBSTITUTE_LITERAL 0x00000004u
514#define CTL2_SUBSTITUTE_MATCHED 0x00000008u
515#define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000010u
516#define CTL2_SUBSTITUTE_REPLACEMENT_ONLY 0x00000020u
517#define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000040u
518#define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000080u
519#define CTL2_SUBJECT_LITERAL 0x00000100u
520#define CTL2_CALLOUT_NO_WHERE 0x00000200u
521#define CTL2_CALLOUT_EXTRA 0x00000400u
522#define CTL2_ALLVECTOR 0x00000800u
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700523#define CTL2_NULL_SUBJECT 0x00001000u
524#define CTL2_NULL_REPLACEMENT 0x00002000u
Elliott Hughes5b808042021-10-01 10:56:10 -0700525
526#define CTL2_NL_SET 0x40000000u /* Informational */
527#define CTL2_BSR_SET 0x80000000u /* Informational */
528
529/* These are the matching controls that may be set either on a pattern or on a
530data line. They are copied from the pattern controls as initial settings for
531data line controls. Note that CTL_MEMORY is not included here, because it does
532different things in the two cases. */
533
534#define CTL_ALLPD (CTL_AFTERTEXT|\
535 CTL_ALLAFTERTEXT|\
536 CTL_ALLCAPTURES|\
537 CTL_ALLUSEDTEXT|\
538 CTL_ALTGLOBAL|\
539 CTL_GLOBAL|\
540 CTL_MARK|\
541 CTL_STARTCHAR|\
542 CTL_UTF8_INPUT)
543
544#define CTL2_ALLPD (CTL2_SUBSTITUTE_CALLOUT|\
545 CTL2_SUBSTITUTE_EXTENDED|\
546 CTL2_SUBSTITUTE_LITERAL|\
547 CTL2_SUBSTITUTE_MATCHED|\
548 CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
549 CTL2_SUBSTITUTE_REPLACEMENT_ONLY|\
550 CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
551 CTL2_SUBSTITUTE_UNSET_EMPTY|\
552 CTL2_ALLVECTOR)
553
554/* Structures for holding modifier information for patterns and subject strings
555(data). Fields containing modifiers that can be set either for a pattern or a
556subject must be at the start and in the same order in both cases so that the
557same offset in the big table below works for both. */
558
559typedef struct patctl { /* Structure for pattern modifiers. */
560 uint32_t options; /* Must be in same position as datctl */
561 uint32_t control; /* Must be in same position as datctl */
562 uint32_t control2; /* Must be in same position as datctl */
563 uint32_t jitstack; /* Must be in same position as datctl */
564 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
565 uint32_t substitute_skip; /* Must be in same position as patctl */
566 uint32_t substitute_stop; /* Must be in same position as patctl */
567 uint32_t jit;
568 uint32_t stackguard_test;
569 uint32_t tables_id;
570 uint32_t convert_type;
571 uint32_t convert_length;
572 uint32_t convert_glob_escape;
573 uint32_t convert_glob_separator;
574 uint32_t regerror_buffsize;
575 uint8_t locale[LOCALESIZE];
576} patctl;
577
578#define MAXCPYGET 10
579#define LENCPYGET 64
580
581typedef struct datctl { /* Structure for data line modifiers. */
582 uint32_t options; /* Must be in same position as patctl */
583 uint32_t control; /* Must be in same position as patctl */
584 uint32_t control2; /* Must be in same position as patctl */
585 uint32_t jitstack; /* Must be in same position as patctl */
586 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
587 uint32_t substitute_skip; /* Must be in same position as patctl */
588 uint32_t substitute_stop; /* Must be in same position as patctl */
589 uint32_t startend[2];
590 uint32_t cerror[2];
591 uint32_t cfail[2];
592 int32_t callout_data;
593 int32_t copy_numbers[MAXCPYGET];
594 int32_t get_numbers[MAXCPYGET];
595 uint32_t oveccount;
596 uint32_t offset;
597 uint8_t copy_names[LENCPYGET];
598 uint8_t get_names[LENCPYGET];
599} datctl;
600
601/* Ids for which context to modify. */
602
603enum { CTX_PAT, /* Active pattern context */
604 CTX_POPPAT, /* Ditto, for a popped pattern */
605 CTX_DEFPAT, /* Default pattern context */
606 CTX_DAT, /* Active data (match) context */
607 CTX_DEFDAT }; /* Default data (match) context */
608
609/* Macros to simplify the big table below. */
610
611#define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name)
612#define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name)
613#define PO(name) offsetof(patctl, name)
614#define PD(name) PO(name)
615#define DO(name) offsetof(datctl, name)
616
617/* Table of all long-form modifiers. Must be in collating sequence of modifier
618name because it is searched by binary chop. */
619
620typedef struct modstruct {
621 const char *name;
622 uint16_t which;
623 uint16_t type;
624 uint32_t value;
625 PCRE2_SIZE offset;
626} modstruct;
627
628static modstruct modlist[] = {
629 { "aftertext", MOD_PNDP, MOD_CTL, CTL_AFTERTEXT, PO(control) },
630 { "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) },
631 { "allcaptures", MOD_PND, MOD_CTL, CTL_ALLCAPTURES, PO(control) },
632 { "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) },
633 { "allow_lookaround_bsk", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK, CO(extra_options) },
634 { "allow_surrogate_escapes", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES, CO(extra_options) },
635 { "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) },
636 { "allvector", MOD_PND, MOD_CTL, CTL2_ALLVECTOR, PO(control2) },
637 { "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) },
638 { "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) },
639 { "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) },
640 { "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) },
641 { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) },
642 { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) },
643 { "bad_escape_is_literal", MOD_CTC, MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) },
644 { "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) },
645 { "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) },
646 { "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) },
647 { "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) },
648 { "callout_error", MOD_DAT, MOD_IN2, 0, DO(cerror) },
649 { "callout_extra", MOD_DAT, MOD_CTL, CTL2_CALLOUT_EXTRA, DO(control2) },
650 { "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) },
651 { "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) },
652 { "callout_no_where", MOD_DAT, MOD_CTL, CTL2_CALLOUT_NO_WHERE, DO(control2) },
653 { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
654 { "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) },
655 { "convert", MOD_PAT, MOD_CON, 0, PO(convert_type) },
656 { "convert_glob_escape", MOD_PAT, MOD_CHR, 0, PO(convert_glob_escape) },
657 { "convert_glob_separator", MOD_PAT, MOD_CHR, 0, PO(convert_glob_separator) },
658 { "convert_length", MOD_PAT, MOD_INT, 0, PO(convert_length) },
659 { "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) },
660 { "copy_matched_subject", MOD_DAT, MOD_OPT, PCRE2_COPY_MATCHED_SUBJECT, DO(options) },
661 { "debug", MOD_PAT, MOD_CTL, CTL_DEBUG, PO(control) },
662 { "depth_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) },
663 { "dfa", MOD_DAT, MOD_CTL, CTL_DFA, DO(control) },
664 { "dfa_restart", MOD_DAT, MOD_OPT, PCRE2_DFA_RESTART, DO(options) },
665 { "dfa_shortest", MOD_DAT, MOD_OPT, PCRE2_DFA_SHORTEST, DO(options) },
666 { "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) },
667 { "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) },
668 { "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) },
669 { "endanchored", MOD_PD, MOD_OPT, PCRE2_ENDANCHORED, PD(options) },
670 { "escaped_cr_is_lf", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ESCAPED_CR_IS_LF, CO(extra_options) },
671 { "expand", MOD_PAT, MOD_CTL, CTL_EXPAND, PO(control) },
672 { "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) },
673 { "extended_more", MOD_PATP, MOD_OPT, PCRE2_EXTENDED_MORE, PO(options) },
674 { "extra_alt_bsux", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALT_BSUX, CO(extra_options) },
675 { "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) },
676 { "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) },
677 { "framesize", MOD_PAT, MOD_CTL, CTL_FRAMESIZE, PO(control) },
678 { "fullbincode", MOD_PAT, MOD_CTL, CTL_FULLBINCODE, PO(control) },
679 { "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) },
680 { "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) },
681 { "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) },
682 { "heap_limit", MOD_CTM, MOD_INT, 0, MO(heap_limit) },
683 { "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) },
684 { "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
685 { "jit", MOD_PAT, MOD_IND, 7, PO(jit) },
686 { "jitfast", MOD_PAT, MOD_CTL, CTL_JITFAST, PO(control) },
687 { "jitstack", MOD_PNDP, MOD_INT, 0, PO(jitstack) },
688 { "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) },
689 { "literal", MOD_PAT, MOD_OPT, PCRE2_LITERAL, PO(options) },
690 { "locale", MOD_PAT, MOD_STR, LOCALESIZE, PO(locale) },
691 { "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) },
692 { "match_invalid_utf", MOD_PAT, MOD_OPT, PCRE2_MATCH_INVALID_UTF, PO(options) },
693 { "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) },
694 { "match_line", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_LINE, CO(extra_options) },
695 { "match_unset_backref", MOD_PAT, MOD_OPT, PCRE2_MATCH_UNSET_BACKREF, PO(options) },
696 { "match_word", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_WORD, CO(extra_options) },
697 { "max_pattern_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_length) },
698 { "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) },
699 { "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) },
700 { "never_backslash_c", MOD_PAT, MOD_OPT, PCRE2_NEVER_BACKSLASH_C, PO(options) },
701 { "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) },
702 { "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) },
703 { "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) },
704 { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) },
705 { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) },
706 { "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) },
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700707 { "no_jit", MOD_DATP, MOD_OPT, PCRE2_NO_JIT, DO(options) },
Elliott Hughes5b808042021-10-01 10:56:10 -0700708 { "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) },
709 { "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) },
710 { "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) },
711 { "notempty", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY, DO(options) },
712 { "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) },
713 { "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) },
714 { "null_context", MOD_PD, MOD_CTL, CTL_NULLCONTEXT, PO(control) },
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700715 { "null_replacement", MOD_DAT, MOD_CTL, CTL2_NULL_REPLACEMENT, DO(control2) },
716 { "null_subject", MOD_DAT, MOD_CTL, CTL2_NULL_SUBJECT, DO(control2) },
Elliott Hughes5b808042021-10-01 10:56:10 -0700717 { "offset", MOD_DAT, MOD_INT, 0, DO(offset) },
718 { "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)},
719 { "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) },
720 { "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) },
721 { "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
722 { "partial_soft", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
723 { "ph", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
724 { "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) },
725 { "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) },
726 { "posix_startend", MOD_DAT, MOD_IN2, 0, DO(startend) },
727 { "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
728 { "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) },
729 { "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) },
730 { "pushtablescopy", MOD_PAT, MOD_CTL, CTL_PUSHTABLESCOPY, PO(control) },
731 { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) }, /* Obsolete synonym */
732 { "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) },
733 { "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) },
734 { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) },
735 { "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
736 { "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) },
737 { "subject_literal", MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL, PO(control2) },
738 { "substitute_callout", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_CALLOUT, PO(control2) },
739 { "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) },
740 { "substitute_literal", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_LITERAL, PO(control2) },
741 { "substitute_matched", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_MATCHED, PO(control2) },
742 { "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
743 { "substitute_replacement_only", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_REPLACEMENT_ONLY, PO(control2) },
744 { "substitute_skip", MOD_PND, MOD_INT, 0, PO(substitute_skip) },
745 { "substitute_stop", MOD_PND, MOD_INT, 0, PO(substitute_stop) },
746 { "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
747 { "substitute_unset_empty", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) },
748 { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
749 { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
750 { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
751 { "use_length", MOD_PAT, MOD_CTL, CTL_USE_LENGTH, PO(control) },
752 { "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) },
753 { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) },
754 { "utf8_input", MOD_PAT, MOD_CTL, CTL_UTF8_INPUT, PO(control) },
755 { "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) }
756};
757
758#define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
759
760/* Controls and options that are supported for use with the POSIX interface. */
761
762#define POSIX_SUPPORTED_COMPILE_OPTIONS ( \
763 PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_LITERAL|PCRE2_MULTILINE|PCRE2_UCP| \
764 PCRE2_UTF|PCRE2_UNGREEDY)
765
766#define POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS (0)
767
768#define POSIX_SUPPORTED_COMPILE_CONTROLS ( \
769 CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_HEXPAT|CTL_POSIX| \
770 CTL_POSIX_NOSUB|CTL_USE_LENGTH)
771
772#define POSIX_SUPPORTED_COMPILE_CONTROLS2 (0)
773
774#define POSIX_SUPPORTED_MATCH_OPTIONS ( \
775 PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
776
777#define POSIX_SUPPORTED_MATCH_CONTROLS (CTL_AFTERTEXT|CTL_ALLAFTERTEXT)
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700778#define POSIX_SUPPORTED_MATCH_CONTROLS2 (CTL2_NULL_SUBJECT)
Elliott Hughes5b808042021-10-01 10:56:10 -0700779
780/* Control bits that are not ignored with 'push'. */
781
782#define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
783 CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
784 CTL_JITVERIFY|CTL_MEMORY|CTL_FRAMESIZE|CTL_PUSH|CTL_PUSHCOPY| \
785 CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
786
787#define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL2_BSR_SET|CTL2_NL_SET)
788
789/* Controls that apply only at compile time with 'push'. */
790
791#define PUSH_COMPILE_ONLY_CONTROLS CTL_JITVERIFY
792#define PUSH_COMPILE_ONLY_CONTROLS2 (0)
793
794/* Controls that are forbidden with #pop or #popcopy. */
795
796#define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
797 CTL_PUSHCOPY|CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
798
799/* Pattern controls that are mutually exclusive. At present these are all in
800the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
801CTL_POSIX, so it doesn't need its own entries. */
802
803static uint32_t exclusive_pat_controls[] = {
804 CTL_POSIX | CTL_PUSH,
805 CTL_POSIX | CTL_PUSHCOPY,
806 CTL_POSIX | CTL_PUSHTABLESCOPY,
807 CTL_PUSH | CTL_PUSHCOPY,
808 CTL_PUSH | CTL_PUSHTABLESCOPY,
809 CTL_PUSHCOPY | CTL_PUSHTABLESCOPY,
810 CTL_EXPAND | CTL_HEXPAT };
811
812/* Data controls that are mutually exclusive. At present these are all in the
813first control word. */
814
815static uint32_t exclusive_dat_controls[] = {
816 CTL_ALLUSEDTEXT | CTL_STARTCHAR,
817 CTL_FINDLIMITS | CTL_NULLCONTEXT };
818
819/* Table of single-character abbreviated modifiers. The index field is
820initialized to -1, but the first time the modifier is encountered, it is filled
821in with the index of the full entry in modlist, to save repeated searching when
822processing multiple test items. This short list is searched serially, so its
823order does not matter. */
824
825typedef struct c1modstruct {
826 const char *fullname;
827 uint32_t onechar;
828 int index;
829} c1modstruct;
830
831static c1modstruct c1modlist[] = {
832 { "bincode", 'B', -1 },
833 { "info", 'I', -1 },
834 { "global", 'g', -1 },
835 { "caseless", 'i', -1 },
836 { "multiline", 'm', -1 },
837 { "no_auto_capture", 'n', -1 },
838 { "dotall", 's', -1 },
839 { "extended", 'x', -1 }
840};
841
842#define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct)
843
844/* Table of arguments for the -C command line option. Use macros to make the
845table itself easier to read. */
846
847#if defined SUPPORT_PCRE2_8
848#define SUPPORT_8 1
849#endif
850#if defined SUPPORT_PCRE2_16
851#define SUPPORT_16 1
852#endif
853#if defined SUPPORT_PCRE2_32
854#define SUPPORT_32 1
855#endif
856
857#ifndef SUPPORT_8
858#define SUPPORT_8 0
859#endif
860#ifndef SUPPORT_16
861#define SUPPORT_16 0
862#endif
863#ifndef SUPPORT_32
864#define SUPPORT_32 0
865#endif
866
867#ifdef EBCDIC
868#define SUPPORT_EBCDIC 1
869#define EBCDIC_NL CHAR_LF
870#else
871#define SUPPORT_EBCDIC 0
872#define EBCDIC_NL 0
873#endif
874
875#ifdef NEVER_BACKSLASH_C
876#define BACKSLASH_C 0
877#else
878#define BACKSLASH_C 1
879#endif
880
881typedef struct coptstruct {
882 const char *name;
883 uint32_t type;
884 uint32_t value;
885} coptstruct;
886
887enum { CONF_BSR,
888 CONF_FIX,
889 CONF_FIZ,
890 CONF_INT,
891 CONF_NL
892};
893
894static coptstruct coptlist[] = {
895 { "backslash-C", CONF_FIX, BACKSLASH_C },
896 { "bsr", CONF_BSR, PCRE2_CONFIG_BSR },
897 { "ebcdic", CONF_FIX, SUPPORT_EBCDIC },
898 { "ebcdic-nl", CONF_FIZ, EBCDIC_NL },
899 { "jit", CONF_INT, PCRE2_CONFIG_JIT },
900 { "linksize", CONF_INT, PCRE2_CONFIG_LINKSIZE },
901 { "newline", CONF_NL, PCRE2_CONFIG_NEWLINE },
902 { "pcre2-16", CONF_FIX, SUPPORT_16 },
903 { "pcre2-32", CONF_FIX, SUPPORT_32 },
904 { "pcre2-8", CONF_FIX, SUPPORT_8 },
905 { "unicode", CONF_INT, PCRE2_CONFIG_UNICODE }
906};
907
908#define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
909
910#undef SUPPORT_8
911#undef SUPPORT_16
912#undef SUPPORT_32
913#undef SUPPORT_EBCDIC
914
915
916/* ----------------------- Static variables ------------------------ */
917
918static FILE *infile;
919static FILE *outfile;
920
921static const void *last_callout_mark;
922static PCRE2_JIT_STACK *jit_stack = NULL;
923static size_t jit_stack_size = 0;
924
925static BOOL first_callout;
926static BOOL jit_was_used;
927static BOOL restrict_for_perl_test = FALSE;
928static BOOL show_memory = FALSE;
929
930static int code_unit_size; /* Bytes */
931static int jitrc; /* Return from JIT compile */
932static int test_mode = DEFAULT_TEST_MODE;
933static int timeit = 0;
934static int timeitm = 0;
935
936clock_t total_compile_time = 0;
937clock_t total_jit_compile_time = 0;
938clock_t total_match_time = 0;
939
940static uint32_t dfa_matched;
941static uint32_t forbid_utf = 0;
942static uint32_t maxlookbehind;
943static uint32_t max_oveccount;
944static uint32_t callout_count;
945static uint32_t maxcapcount;
946
947static uint16_t local_newline_default = 0;
948
949static VERSION_TYPE jittarget[VERSION_SIZE];
950static VERSION_TYPE version[VERSION_SIZE];
951static VERSION_TYPE uversion[VERSION_SIZE];
952
953static patctl def_patctl;
954static patctl pat_patctl;
955static datctl def_datctl;
956static datctl dat_datctl;
957
958static void *patstack[PATSTACKSIZE];
959static int patstacknext = 0;
960
961static void *malloclist[MALLOCLISTSIZE];
962static PCRE2_SIZE malloclistlength[MALLOCLISTSIZE];
963static uint32_t malloclistptr = 0;
964
965#ifdef SUPPORT_PCRE2_8
966static regex_t preg = { NULL, NULL, 0, 0, 0, 0 };
967#endif
968
969static int *dfa_workspace = NULL;
970static const uint8_t *locale_tables = NULL;
971static const uint8_t *use_tables = NULL;
972static uint8_t locale_name[32];
973static uint8_t *tables3 = NULL; /* For binary-loaded tables */
974static uint32_t loadtables_length = 0;
975
976/* We need buffers for building 16/32-bit strings; 8-bit strings don't need
977rebuilding, but set up the same naming scheme for use in macros. The "buffer"
978buffer is where all input lines are read. Its size is the same as pbuffer8.
979Pattern lines are always copied to pbuffer8 for use in callouts, even if they
980are actually compiled from pbuffer16 or pbuffer32. */
981
982static size_t pbuffer8_size = 50000; /* Initial size, bytes */
983static uint8_t *pbuffer8 = NULL;
984static uint8_t *buffer = NULL;
985
986/* The dbuffer is where all processed data lines are put. In non-8-bit modes it
987is cast as needed. For long data lines it grows as necessary. */
988
989static size_t dbuffer_size = 1u << 14; /* Initial size, bytes */
990static uint8_t *dbuffer = NULL;
991
992
993/* ---------------- Mode-dependent variables -------------------*/
994
995#ifdef SUPPORT_PCRE2_8
996static pcre2_code_8 *compiled_code8;
997static pcre2_general_context_8 *general_context8, *general_context_copy8;
998static pcre2_compile_context_8 *pat_context8, *default_pat_context8;
999static pcre2_convert_context_8 *con_context8, *default_con_context8;
1000static pcre2_match_context_8 *dat_context8, *default_dat_context8;
1001static pcre2_match_data_8 *match_data8;
1002#endif
1003
1004#ifdef SUPPORT_PCRE2_16
1005static pcre2_code_16 *compiled_code16;
1006static pcre2_general_context_16 *general_context16, *general_context_copy16;
1007static pcre2_compile_context_16 *pat_context16, *default_pat_context16;
1008static pcre2_convert_context_16 *con_context16, *default_con_context16;
1009static pcre2_match_context_16 *dat_context16, *default_dat_context16;
1010static pcre2_match_data_16 *match_data16;
1011static PCRE2_SIZE pbuffer16_size = 0; /* Set only when needed */
1012static uint16_t *pbuffer16 = NULL;
1013#endif
1014
1015#ifdef SUPPORT_PCRE2_32
1016static pcre2_code_32 *compiled_code32;
1017static pcre2_general_context_32 *general_context32, *general_context_copy32;
1018static pcre2_compile_context_32 *pat_context32, *default_pat_context32;
1019static pcre2_convert_context_32 *con_context32, *default_con_context32;
1020static pcre2_match_context_32 *dat_context32, *default_dat_context32;
1021static pcre2_match_data_32 *match_data32;
1022static PCRE2_SIZE pbuffer32_size = 0; /* Set only when needed */
1023static uint32_t *pbuffer32 = NULL;
1024#endif
1025
1026
1027/* ---------------- Macros that work in all modes ----------------- */
1028
1029#define CAST8VAR(x) CASTVAR(uint8_t *, x)
1030#define SET(x,y) SETOP(x,y,=)
1031#define SETPLUS(x,y) SETOP(x,y,+=)
1032#define strlen8(x) strlen((char *)x)
1033
1034
1035/* ---------------- Mode-dependent, runtime-testing macros ------------------*/
1036
1037/* Define macros for variables and functions that must be selected dynamically
1038depending on the mode setting (8, 16, 32). These are dependent on which modes
1039are supported. */
1040
1041#if (defined (SUPPORT_PCRE2_8) + defined (SUPPORT_PCRE2_16) + \
1042 defined (SUPPORT_PCRE2_32)) >= 2
1043
1044/* ----- All three modes supported ----- */
1045
1046#if defined(SUPPORT_PCRE2_8) && defined(SUPPORT_PCRE2_16) && defined(SUPPORT_PCRE2_32)
1047
1048#define CASTFLD(t,a,b) ((test_mode == PCRE8_MODE)? (t)(G(a,8)->b) : \
1049 (test_mode == PCRE16_MODE)? (t)(G(a,16)->b) : (t)(G(a,32)->b))
1050
1051#define CASTVAR(t,x) ( \
1052 (test_mode == PCRE8_MODE)? (t)G(x,8) : \
1053 (test_mode == PCRE16_MODE)? (t)G(x,16) : (t)G(x,32))
1054
1055#define CODE_UNIT(a,b) ( \
1056 (test_mode == PCRE8_MODE)? (uint32_t)(((PCRE2_SPTR8)(a))[b]) : \
1057 (test_mode == PCRE16_MODE)? (uint32_t)(((PCRE2_SPTR16)(a))[b]) : \
1058 (uint32_t)(((PCRE2_SPTR32)(a))[b]))
1059
1060#define CONCTXCPY(a,b) \
1061 if (test_mode == PCRE8_MODE) \
1062 memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8)); \
1063 else if (test_mode == PCRE16_MODE) \
1064 memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16)); \
1065 else memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
1066
1067#define CONVERT_COPY(a,b,c) \
1068 if (test_mode == PCRE8_MODE) \
1069 memcpy(G(a,8),(char *)b,c); \
1070 else if (test_mode == PCRE16_MODE) \
1071 memcpy(G(a,16),(char *)b,(c)*2); \
1072 else if (test_mode == PCRE32_MODE) \
1073 memcpy(G(a,32),(char *)b,(c)*4)
1074
1075#define DATCTXCPY(a,b) \
1076 if (test_mode == PCRE8_MODE) \
1077 memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \
1078 else if (test_mode == PCRE16_MODE) \
1079 memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)); \
1080 else memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
1081
1082#define FLD(a,b) ((test_mode == PCRE8_MODE)? G(a,8)->b : \
1083 (test_mode == PCRE16_MODE)? G(a,16)->b : G(a,32)->b)
1084
1085#define PATCTXCPY(a,b) \
1086 if (test_mode == PCRE8_MODE) \
1087 memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)); \
1088 else if (test_mode == PCRE16_MODE) \
1089 memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \
1090 else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
1091
1092#define PCHARS(lv, p, offset, len, utf, f) \
1093 if (test_mode == PCRE32_MODE) \
1094 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1095 else if (test_mode == PCRE16_MODE) \
1096 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1097 else \
1098 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1099
1100#define PCHARSV(p, offset, len, utf, f) \
1101 if (test_mode == PCRE32_MODE) \
1102 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1103 else if (test_mode == PCRE16_MODE) \
1104 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1105 else \
1106 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1107
1108#define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1109 if (test_mode == PCRE8_MODE) \
1110 a = pcre2_callout_enumerate_8(compiled_code8, \
1111 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c); \
1112 else if (test_mode == PCRE16_MODE) \
1113 a = pcre2_callout_enumerate_16(compiled_code16, \
1114 (int(*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c); \
1115 else \
1116 a = pcre2_callout_enumerate_32(compiled_code32, \
1117 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
1118
1119#define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1120 if (test_mode == PCRE8_MODE) \
1121 G(a,8) = pcre2_code_copy_8(b); \
1122 else if (test_mode == PCRE16_MODE) \
1123 G(a,16) = pcre2_code_copy_16(b); \
1124 else \
1125 G(a,32) = pcre2_code_copy_32(b)
1126
1127#define PCRE2_CODE_COPY_TO_VOID(a,b) \
1128 if (test_mode == PCRE8_MODE) \
1129 a = (void *)pcre2_code_copy_8(G(b,8)); \
1130 else if (test_mode == PCRE16_MODE) \
1131 a = (void *)pcre2_code_copy_16(G(b,16)); \
1132 else \
1133 a = (void *)pcre2_code_copy_32(G(b,32))
1134
1135#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1136 if (test_mode == PCRE8_MODE) \
1137 a = (void *)pcre2_code_copy_with_tables_8(G(b,8)); \
1138 else if (test_mode == PCRE16_MODE) \
1139 a = (void *)pcre2_code_copy_with_tables_16(G(b,16)); \
1140 else \
1141 a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
1142
1143#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1144 if (test_mode == PCRE8_MODE) \
1145 G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g); \
1146 else if (test_mode == PCRE16_MODE) \
1147 G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g); \
1148 else \
1149 G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
1150
1151#define PCRE2_CONVERTED_PATTERN_FREE(a) \
1152 if (test_mode == PCRE8_MODE) pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a); \
1153 else if (test_mode == PCRE16_MODE) pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a); \
1154 else pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
1155
1156#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1157 if (test_mode == PCRE8_MODE) \
1158 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \
1159 else if (test_mode == PCRE16_MODE) \
1160 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j); \
1161 else \
1162 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
1163
1164#define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1165 if (test_mode == PCRE8_MODE) \
1166 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)); \
1167 else if (test_mode == PCRE16_MODE) \
1168 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2)); \
1169 else \
1170 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
1171
1172#define PCRE2_GET_OVECTOR_COUNT(a,b) \
1173 if (test_mode == PCRE8_MODE) \
1174 a = pcre2_get_ovector_count_8(G(b,8)); \
1175 else if (test_mode == PCRE16_MODE) \
1176 a = pcre2_get_ovector_count_16(G(b,16)); \
1177 else \
1178 a = pcre2_get_ovector_count_32(G(b,32))
1179
1180#define PCRE2_GET_STARTCHAR(a,b) \
1181 if (test_mode == PCRE8_MODE) \
1182 a = pcre2_get_startchar_8(G(b,8)); \
1183 else if (test_mode == PCRE16_MODE) \
1184 a = pcre2_get_startchar_16(G(b,16)); \
1185 else \
1186 a = pcre2_get_startchar_32(G(b,32))
1187
1188#define PCRE2_JIT_COMPILE(r,a,b) \
1189 if (test_mode == PCRE8_MODE) r = pcre2_jit_compile_8(G(a,8),b); \
1190 else if (test_mode == PCRE16_MODE) r = pcre2_jit_compile_16(G(a,16),b); \
1191 else r = pcre2_jit_compile_32(G(a,32),b)
1192
1193#define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1194 if (test_mode == PCRE8_MODE) pcre2_jit_free_unused_memory_8(G(a,8)); \
1195 else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \
1196 else pcre2_jit_free_unused_memory_32(G(a,32))
1197
1198#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1199 if (test_mode == PCRE8_MODE) \
1200 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1201 else if (test_mode == PCRE16_MODE) \
1202 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1203 else \
1204 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1205
1206#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1207 if (test_mode == PCRE8_MODE) \
1208 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); \
1209 else if (test_mode == PCRE16_MODE) \
1210 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); \
1211 else \
1212 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
1213
1214#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1215 if (test_mode == PCRE8_MODE) \
1216 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); \
1217 else if (test_mode == PCRE16_MODE) \
1218 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); \
1219 else \
1220 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
1221
1222#define PCRE2_JIT_STACK_FREE(a) \
1223 if (test_mode == PCRE8_MODE) \
1224 pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); \
1225 else if (test_mode == PCRE16_MODE) \
1226 pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); \
1227 else \
1228 pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
1229
1230#define PCRE2_MAKETABLES(a) \
1231 if (test_mode == PCRE8_MODE) a = pcre2_maketables_8(NULL); \
1232 else if (test_mode == PCRE16_MODE) a = pcre2_maketables_16(NULL); \
1233 else a = pcre2_maketables_32(NULL)
1234
1235#define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1236 if (test_mode == PCRE8_MODE) \
1237 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1238 else if (test_mode == PCRE16_MODE) \
1239 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1240 else \
1241 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1242
1243#define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1244 if (test_mode == PCRE8_MODE) \
1245 G(a,8) = pcre2_match_data_create_8(b,c); \
1246 else if (test_mode == PCRE16_MODE) \
1247 G(a,16) = pcre2_match_data_create_16(b,c); \
1248 else \
1249 G(a,32) = pcre2_match_data_create_32(b,c)
1250
1251#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1252 if (test_mode == PCRE8_MODE) \
1253 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c); \
1254 else if (test_mode == PCRE16_MODE) \
1255 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c); \
1256 else \
1257 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
1258
1259#define PCRE2_MATCH_DATA_FREE(a) \
1260 if (test_mode == PCRE8_MODE) \
1261 pcre2_match_data_free_8(G(a,8)); \
1262 else if (test_mode == PCRE16_MODE) \
1263 pcre2_match_data_free_16(G(a,16)); \
1264 else \
1265 pcre2_match_data_free_32(G(a,32))
1266
1267#define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1268 if (test_mode == PCRE8_MODE) \
1269 a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8)); \
1270 else if (test_mode == PCRE16_MODE) \
1271 a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16)); \
1272 else \
1273 a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
1274
1275#define PCRE2_PATTERN_INFO(a,b,c,d) \
1276 if (test_mode == PCRE8_MODE) \
1277 a = pcre2_pattern_info_8(G(b,8),c,d); \
1278 else if (test_mode == PCRE16_MODE) \
1279 a = pcre2_pattern_info_16(G(b,16),c,d); \
1280 else \
1281 a = pcre2_pattern_info_32(G(b,32),c,d)
1282
1283#define PCRE2_PRINTINT(a) \
1284 if (test_mode == PCRE8_MODE) \
1285 pcre2_printint_8(compiled_code8,outfile,a); \
1286 else if (test_mode == PCRE16_MODE) \
1287 pcre2_printint_16(compiled_code16,outfile,a); \
1288 else \
1289 pcre2_printint_32(compiled_code32,outfile,a)
1290
1291#define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1292 if (test_mode == PCRE8_MODE) \
1293 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)); \
1294 else if (test_mode == PCRE16_MODE) \
1295 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)); \
1296 else \
1297 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
1298
1299#define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1300 if (test_mode == PCRE8_MODE) \
1301 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)); \
1302 else if (test_mode == PCRE16_MODE) \
1303 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)); \
1304 else \
1305 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
1306
1307#define PCRE2_SERIALIZE_FREE(a) \
1308 if (test_mode == PCRE8_MODE) \
1309 pcre2_serialize_free_8(a); \
1310 else if (test_mode == PCRE16_MODE) \
1311 pcre2_serialize_free_16(a); \
1312 else \
1313 pcre2_serialize_free_32(a)
1314
1315#define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1316 if (test_mode == PCRE8_MODE) \
1317 r = pcre2_serialize_get_number_of_codes_8(a); \
1318 else if (test_mode == PCRE16_MODE) \
1319 r = pcre2_serialize_get_number_of_codes_16(a); \
1320 else \
1321 r = pcre2_serialize_get_number_of_codes_32(a); \
1322
1323#define PCRE2_SET_CALLOUT(a,b,c) \
1324 if (test_mode == PCRE8_MODE) \
1325 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c); \
1326 else if (test_mode == PCRE16_MODE) \
1327 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); \
1328 else \
1329 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
1330
1331#define PCRE2_SET_CHARACTER_TABLES(a,b) \
1332 if (test_mode == PCRE8_MODE) \
1333 pcre2_set_character_tables_8(G(a,8),b); \
1334 else if (test_mode == PCRE16_MODE) \
1335 pcre2_set_character_tables_16(G(a,16),b); \
1336 else \
1337 pcre2_set_character_tables_32(G(a,32),b)
1338
1339#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1340 if (test_mode == PCRE8_MODE) \
1341 pcre2_set_compile_recursion_guard_8(G(a,8),b,c); \
1342 else if (test_mode == PCRE16_MODE) \
1343 pcre2_set_compile_recursion_guard_16(G(a,16),b,c); \
1344 else \
1345 pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
1346
1347#define PCRE2_SET_DEPTH_LIMIT(a,b) \
1348 if (test_mode == PCRE8_MODE) \
1349 pcre2_set_depth_limit_8(G(a,8),b); \
1350 else if (test_mode == PCRE16_MODE) \
1351 pcre2_set_depth_limit_16(G(a,16),b); \
1352 else \
1353 pcre2_set_depth_limit_32(G(a,32),b)
1354
1355#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1356 if (test_mode == PCRE8_MODE) \
1357 r = pcre2_set_glob_separator_8(G(a,8),b); \
1358 else if (test_mode == PCRE16_MODE) \
1359 r = pcre2_set_glob_separator_16(G(a,16),b); \
1360 else \
1361 r = pcre2_set_glob_separator_32(G(a,32),b)
1362
1363#define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1364 if (test_mode == PCRE8_MODE) \
1365 r = pcre2_set_glob_escape_8(G(a,8),b); \
1366 else if (test_mode == PCRE16_MODE) \
1367 r = pcre2_set_glob_escape_16(G(a,16),b); \
1368 else \
1369 r = pcre2_set_glob_escape_32(G(a,32),b)
1370
1371#define PCRE2_SET_HEAP_LIMIT(a,b) \
1372 if (test_mode == PCRE8_MODE) \
1373 pcre2_set_heap_limit_8(G(a,8),b); \
1374 else if (test_mode == PCRE16_MODE) \
1375 pcre2_set_heap_limit_16(G(a,16),b); \
1376 else \
1377 pcre2_set_heap_limit_32(G(a,32),b)
1378
1379#define PCRE2_SET_MATCH_LIMIT(a,b) \
1380 if (test_mode == PCRE8_MODE) \
1381 pcre2_set_match_limit_8(G(a,8),b); \
1382 else if (test_mode == PCRE16_MODE) \
1383 pcre2_set_match_limit_16(G(a,16),b); \
1384 else \
1385 pcre2_set_match_limit_32(G(a,32),b)
1386
1387#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1388 if (test_mode == PCRE8_MODE) \
1389 pcre2_set_max_pattern_length_8(G(a,8),b); \
1390 else if (test_mode == PCRE16_MODE) \
1391 pcre2_set_max_pattern_length_16(G(a,16),b); \
1392 else \
1393 pcre2_set_max_pattern_length_32(G(a,32),b)
1394
1395#define PCRE2_SET_OFFSET_LIMIT(a,b) \
1396 if (test_mode == PCRE8_MODE) \
1397 pcre2_set_offset_limit_8(G(a,8),b); \
1398 else if (test_mode == PCRE16_MODE) \
1399 pcre2_set_offset_limit_16(G(a,16),b); \
1400 else \
1401 pcre2_set_offset_limit_32(G(a,32),b)
1402
1403#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1404 if (test_mode == PCRE8_MODE) \
1405 pcre2_set_parens_nest_limit_8(G(a,8),b); \
1406 else if (test_mode == PCRE16_MODE) \
1407 pcre2_set_parens_nest_limit_16(G(a,16),b); \
1408 else \
1409 pcre2_set_parens_nest_limit_32(G(a,32),b)
1410
1411#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
1412 if (test_mode == PCRE8_MODE) \
1413 pcre2_set_substitute_callout_8(G(a,8), \
1414 (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c); \
1415 else if (test_mode == PCRE16_MODE) \
1416 pcre2_set_substitute_callout_16(G(a,16), \
1417 (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c); \
1418 else \
1419 pcre2_set_substitute_callout_32(G(a,32), \
1420 (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
1421
1422#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1423 if (test_mode == PCRE8_MODE) \
1424 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
1425 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \
1426 else if (test_mode == PCRE16_MODE) \
1427 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
1428 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \
1429 else \
1430 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
1431 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
1432
1433#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1434 if (test_mode == PCRE8_MODE) \
1435 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \
1436 else if (test_mode == PCRE16_MODE) \
1437 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e); \
1438 else \
1439 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
1440
1441#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1442 if (test_mode == PCRE8_MODE) \
1443 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e); \
1444 else if (test_mode == PCRE16_MODE) \
1445 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e); \
1446 else \
1447 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e)
1448
1449#define PCRE2_SUBSTRING_FREE(a) \
1450 if (test_mode == PCRE8_MODE) pcre2_substring_free_8((PCRE2_UCHAR8 *)a); \
1451 else if (test_mode == PCRE16_MODE) \
1452 pcre2_substring_free_16((PCRE2_UCHAR16 *)a); \
1453 else pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
1454
1455#define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1456 if (test_mode == PCRE8_MODE) \
1457 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e); \
1458 else if (test_mode == PCRE16_MODE) \
1459 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e); \
1460 else \
1461 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
1462
1463#define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1464 if (test_mode == PCRE8_MODE) \
1465 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e); \
1466 else if (test_mode == PCRE16_MODE) \
1467 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e); \
1468 else \
1469 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
1470
1471#define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1472 if (test_mode == PCRE8_MODE) \
1473 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d); \
1474 else if (test_mode == PCRE16_MODE) \
1475 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d); \
1476 else \
1477 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
1478
1479#define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1480 if (test_mode == PCRE8_MODE) \
1481 a = pcre2_substring_length_bynumber_8(G(b,8),c,d); \
1482 else if (test_mode == PCRE16_MODE) \
1483 a = pcre2_substring_length_bynumber_16(G(b,16),c,d); \
1484 else \
1485 a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
1486
1487#define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1488 if (test_mode == PCRE8_MODE) \
1489 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d); \
1490 else if (test_mode == PCRE16_MODE) \
1491 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d); \
1492 else \
1493 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
1494
1495#define PCRE2_SUBSTRING_LIST_FREE(a) \
1496 if (test_mode == PCRE8_MODE) \
1497 pcre2_substring_list_free_8((PCRE2_SPTR8 *)a); \
1498 else if (test_mode == PCRE16_MODE) \
1499 pcre2_substring_list_free_16((PCRE2_SPTR16 *)a); \
1500 else \
1501 pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
1502
1503#define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1504 if (test_mode == PCRE8_MODE) \
1505 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); \
1506 else if (test_mode == PCRE16_MODE) \
1507 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); \
1508 else \
1509 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32))
1510
1511#define PTR(x) ( \
1512 (test_mode == PCRE8_MODE)? (void *)G(x,8) : \
1513 (test_mode == PCRE16_MODE)? (void *)G(x,16) : \
1514 (void *)G(x,32))
1515
1516#define SETFLD(x,y,z) \
1517 if (test_mode == PCRE8_MODE) G(x,8)->y = z; \
1518 else if (test_mode == PCRE16_MODE) G(x,16)->y = z; \
1519 else G(x,32)->y = z
1520
1521#define SETFLDVEC(x,y,v,z) \
1522 if (test_mode == PCRE8_MODE) G(x,8)->y[v] = z; \
1523 else if (test_mode == PCRE16_MODE) G(x,16)->y[v] = z; \
1524 else G(x,32)->y[v] = z
1525
1526#define SETOP(x,y,z) \
1527 if (test_mode == PCRE8_MODE) G(x,8) z y; \
1528 else if (test_mode == PCRE16_MODE) G(x,16) z y; \
1529 else G(x,32) z y
1530
1531#define SETCASTPTR(x,y) \
1532 if (test_mode == PCRE8_MODE) \
1533 G(x,8) = (uint8_t *)(y); \
1534 else if (test_mode == PCRE16_MODE) \
1535 G(x,16) = (uint16_t *)(y); \
1536 else \
1537 G(x,32) = (uint32_t *)(y)
1538
1539#define STRLEN(p) ((test_mode == PCRE8_MODE)? ((int)strlen((char *)p)) : \
1540 (test_mode == PCRE16_MODE)? ((int)strlen16((PCRE2_SPTR16)p)) : \
1541 ((int)strlen32((PCRE2_SPTR32)p)))
1542
1543#define SUB1(a,b) \
1544 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8)); \
1545 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16)); \
1546 else G(a,32)(G(b,32))
1547
1548#define SUB2(a,b,c) \
1549 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8),G(c,8)); \
1550 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16),G(c,16)); \
1551 else G(a,32)(G(b,32),G(c,32))
1552
1553#define TEST(x,r,y) ( \
1554 (test_mode == PCRE8_MODE && G(x,8) r (y)) || \
1555 (test_mode == PCRE16_MODE && G(x,16) r (y)) || \
1556 (test_mode == PCRE32_MODE && G(x,32) r (y)))
1557
1558#define TESTFLD(x,f,r,y) ( \
1559 (test_mode == PCRE8_MODE && G(x,8)->f r (y)) || \
1560 (test_mode == PCRE16_MODE && G(x,16)->f r (y)) || \
1561 (test_mode == PCRE32_MODE && G(x,32)->f r (y)))
1562
1563
1564/* ----- Two out of three modes are supported ----- */
1565
1566#else
1567
1568/* We can use some macro trickery to make a single set of definitions work in
1569the three different cases. */
1570
1571/* ----- 32-bit and 16-bit but not 8-bit supported ----- */
1572
1573#if defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_16)
1574#define BITONE 32
1575#define BITTWO 16
1576
1577/* ----- 32-bit and 8-bit but not 16-bit supported ----- */
1578
1579#elif defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_8)
1580#define BITONE 32
1581#define BITTWO 8
1582
1583/* ----- 16-bit and 8-bit but not 32-bit supported ----- */
1584
1585#else
1586#define BITONE 16
1587#define BITTWO 8
1588#endif
1589
1590
1591/* ----- Common macros for two-mode cases ----- */
1592
1593#define BYTEONE (BITONE/8)
1594#define BYTETWO (BITTWO/8)
1595
1596#define CASTFLD(t,a,b) \
1597 ((test_mode == G(G(PCRE,BITONE),_MODE))? (t)(G(a,BITONE)->b) : \
1598 (t)(G(a,BITTWO)->b))
1599
1600#define CASTVAR(t,x) ( \
1601 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1602 (t)G(x,BITONE) : (t)G(x,BITTWO))
1603
1604#define CODE_UNIT(a,b) ( \
1605 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1606 (uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \
1607 (uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b]))
1608
1609#define CONCTXCPY(a,b) \
1610 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1611 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_convert_context_,BITONE))); \
1612 else \
1613 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_convert_context_,BITTWO)))
1614
1615#define CONVERT_COPY(a,b,c) \
1616 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1617 memcpy(G(a,BITONE),(char *)b,(c)*BYTEONE) : \
1618 memcpy(G(a,BITTWO),(char *)b,(c)*BYTETWO)
1619
1620#define DATCTXCPY(a,b) \
1621 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1622 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \
1623 else \
1624 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_match_context_,BITTWO)))
1625
1626#define FLD(a,b) \
1627 ((test_mode == G(G(PCRE,BITONE),_MODE))? G(a,BITONE)->b : G(a,BITTWO)->b)
1628
1629#define PATCTXCPY(a,b) \
1630 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1631 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_compile_context_,BITONE))); \
1632 else \
1633 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO)))
1634
1635#define PCHARS(lv, p, offset, len, utf, f) \
1636 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1637 lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1638 else \
1639 lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1640
1641#define PCHARSV(p, offset, len, utf, f) \
1642 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1643 (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1644 else \
1645 (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1646
1647#define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1648 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1649 a = G(pcre2_callout_enumerate,BITONE)(G(compiled_code,BITONE), \
1650 (int (*)(struct G(pcre2_callout_enumerate_block_,BITONE) *, void *))b,c); \
1651 else \
1652 a = G(pcre2_callout_enumerate,BITTWO)(G(compiled_code,BITTWO), \
1653 (int (*)(struct G(pcre2_callout_enumerate_block_,BITTWO) *, void *))b,c)
1654
1655#define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1656 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1657 G(a,BITONE) = G(pcre2_code_copy_,BITONE)(b); \
1658 else \
1659 G(a,BITTWO) = G(pcre2_code_copy_,BITTWO)(b)
1660
1661#define PCRE2_CODE_COPY_TO_VOID(a,b) \
1662 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1663 a = (void *)G(pcre2_code_copy_,BITONE)(G(b,BITONE)); \
1664 else \
1665 a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO))
1666
1667#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1668 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1669 a = (void *)G(pcre2_code_copy_with_tables_,BITONE)(G(b,BITONE)); \
1670 else \
1671 a = (void *)G(pcre2_code_copy_with_tables_,BITTWO)(G(b,BITTWO))
1672
1673#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1674 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1675 G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,g); \
1676 else \
1677 G(a,BITTWO) = G(pcre2_compile_,BITTWO)(G(b,BITTWO),c,d,e,f,g)
1678
1679#define PCRE2_CONVERTED_PATTERN_FREE(a) \
1680 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1681 G(pcre2_converted_pattern_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1682 else \
1683 G(pcre2_converted_pattern_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1684
1685#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1686 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1687 a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1688 G(g,BITONE),h,i,j); \
1689 else \
1690 a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1691 G(g,BITTWO),h,i,j)
1692
1693#define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1694 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1695 r = G(pcre2_get_error_message_,BITONE)(a,G(b,BITONE),G(G(b,BITONE),_size/BYTEONE)); \
1696 else \
1697 r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size/BYTETWO))
1698
1699#define PCRE2_GET_OVECTOR_COUNT(a,b) \
1700 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1701 a = G(pcre2_get_ovector_count_,BITONE)(G(b,BITONE)); \
1702 else \
1703 a = G(pcre2_get_ovector_count_,BITTWO)(G(b,BITTWO))
1704
1705#define PCRE2_GET_STARTCHAR(a,b) \
1706 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1707 a = G(pcre2_get_startchar_,BITONE)(G(b,BITONE)); \
1708 else \
1709 a = G(pcre2_get_startchar_,BITTWO)(G(b,BITTWO))
1710
1711#define PCRE2_JIT_COMPILE(r,a,b) \
1712 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1713 r = G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \
1714 else \
1715 r = G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b)
1716
1717#define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1718 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1719 G(pcre2_jit_free_unused_memory_,BITONE)(G(a,BITONE)); \
1720 else \
1721 G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO))
1722
1723#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1724 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1725 a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1726 G(g,BITONE),h); \
1727 else \
1728 a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1729 G(g,BITTWO),h)
1730
1731#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1732 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1733 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITONE)(b,c,d); \
1734 else \
1735 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITTWO)(b,c,d); \
1736
1737#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1738 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1739 G(pcre2_jit_stack_assign_,BITONE)(G(a,BITONE),(G(pcre2_jit_callback_,BITONE))b,c); \
1740 else \
1741 G(pcre2_jit_stack_assign_,BITTWO)(G(a,BITTWO),(G(pcre2_jit_callback_,BITTWO))b,c);
1742
1743#define PCRE2_JIT_STACK_FREE(a) \
1744 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1745 G(pcre2_jit_stack_free_,BITONE)((G(pcre2_jit_stack_,BITONE) *)a); \
1746 else \
1747 G(pcre2_jit_stack_free_,BITTWO)((G(pcre2_jit_stack_,BITTWO) *)a);
1748
1749#define PCRE2_MAKETABLES(a) \
1750 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1751 a = G(pcre2_maketables_,BITONE)(NULL); \
1752 else \
1753 a = G(pcre2_maketables_,BITTWO)(NULL)
1754
1755#define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1756 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1757 a = G(pcre2_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1758 G(g,BITONE),h); \
1759 else \
1760 a = G(pcre2_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1761 G(g,BITTWO),h)
1762
1763#define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1764 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1765 G(a,BITONE) = G(pcre2_match_data_create_,BITONE)(b,c); \
1766 else \
1767 G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,c)
1768
1769#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1770 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1771 G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),c); \
1772 else \
1773 G(a,BITTWO) = G(pcre2_match_data_create_from_pattern_,BITTWO)(G(b,BITTWO),c)
1774
1775#define PCRE2_MATCH_DATA_FREE(a) \
1776 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1777 G(pcre2_match_data_free_,BITONE)(G(a,BITONE)); \
1778 else \
1779 G(pcre2_match_data_free_,BITTWO)(G(a,BITTWO))
1780
1781#define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1782 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1783 a = G(pcre2_pattern_convert_,BITONE)(G(b,BITONE),c,d,(G(PCRE2_UCHAR,BITONE) **)e,f,G(g,BITONE)); \
1784 else \
1785 a = G(pcre2_pattern_convert_,BITTWO)(G(b,BITTWO),c,d,(G(PCRE2_UCHAR,BITTWO) **)e,f,G(g,BITTWO))
1786
1787#define PCRE2_PATTERN_INFO(a,b,c,d) \
1788 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1789 a = G(pcre2_pattern_info_,BITONE)(G(b,BITONE),c,d); \
1790 else \
1791 a = G(pcre2_pattern_info_,BITTWO)(G(b,BITTWO),c,d)
1792
1793#define PCRE2_PRINTINT(a) \
1794 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1795 G(pcre2_printint_,BITONE)(G(compiled_code,BITONE),outfile,a); \
1796 else \
1797 G(pcre2_printint_,BITTWO)(G(compiled_code,BITTWO),outfile,a)
1798
1799#define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1800 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1801 r = G(pcre2_serialize_decode_,BITONE)((G(pcre2_code_,BITONE) **)a,b,c,G(d,BITONE)); \
1802 else \
1803 r = G(pcre2_serialize_decode_,BITTWO)((G(pcre2_code_,BITTWO) **)a,b,c,G(d,BITTWO))
1804
1805#define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1806 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1807 r = G(pcre2_serialize_encode_,BITONE)((G(const pcre2_code_,BITONE) **)a,b,c,d,G(e,BITONE)); \
1808 else \
1809 r = G(pcre2_serialize_encode_,BITTWO)((G(const pcre2_code_,BITTWO) **)a,b,c,d,G(e,BITTWO))
1810
1811#define PCRE2_SERIALIZE_FREE(a) \
1812 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1813 G(pcre2_serialize_free_,BITONE)(a); \
1814 else \
1815 G(pcre2_serialize_free_,BITTWO)(a)
1816
1817#define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1818 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1819 r = G(pcre2_serialize_get_number_of_codes_,BITONE)(a); \
1820 else \
1821 r = G(pcre2_serialize_get_number_of_codes_,BITTWO)(a)
1822
1823#define PCRE2_SET_CALLOUT(a,b,c) \
1824 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1825 G(pcre2_set_callout_,BITONE)(G(a,BITONE), \
1826 (int (*)(G(pcre2_callout_block_,BITONE) *, void *))b,c); \
1827 else \
1828 G(pcre2_set_callout_,BITTWO)(G(a,BITTWO), \
1829 (int (*)(G(pcre2_callout_block_,BITTWO) *, void *))b,c);
1830
1831#define PCRE2_SET_CHARACTER_TABLES(a,b) \
1832 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1833 G(pcre2_set_character_tables_,BITONE)(G(a,BITONE),b); \
1834 else \
1835 G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)
1836
1837#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1838 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1839 G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b,c); \
1840 else \
1841 G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c)
1842
1843#define PCRE2_SET_DEPTH_LIMIT(a,b) \
1844 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1845 G(pcre2_set_depth_limit_,BITONE)(G(a,BITONE),b); \
1846 else \
1847 G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
1848
1849#define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1850 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1851 r = G(pcre2_set_glob_escape_,BITONE)(G(a,BITONE),b); \
1852 else \
1853 r = G(pcre2_set_glob_escape_,BITTWO)(G(a,BITTWO),b)
1854
1855#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1856 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1857 r = G(pcre2_set_glob_separator_,BITONE)(G(a,BITONE),b); \
1858 else \
1859 r = G(pcre2_set_glob_separator_,BITTWO)(G(a,BITTWO),b)
1860
1861#define PCRE2_SET_HEAP_LIMIT(a,b) \
1862 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1863 G(pcre2_set_heap_limit_,BITONE)(G(a,BITONE),b); \
1864 else \
1865 G(pcre2_set_heap_limit_,BITTWO)(G(a,BITTWO),b)
1866
1867#define PCRE2_SET_MATCH_LIMIT(a,b) \
1868 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1869 G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
1870 else \
1871 G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b)
1872
1873#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1874 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1875 G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \
1876 else \
1877 G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b)
1878
1879#define PCRE2_SET_OFFSET_LIMIT(a,b) \
1880 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1881 G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \
1882 else \
1883 G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b)
1884
1885#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1886 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1887 G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \
1888 else \
1889 G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b)
1890
1891#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
1892 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1893 G(pcre2_set_substitute_callout_,BITONE)(G(a,BITONE), \
1894 (int (*)(G(pcre2_substitute_callout_block_,BITONE) *, void *))b,c); \
1895 else \
1896 G(pcre2_set_substitute_callout_,BITTWO)(G(a,BITTWO), \
1897 (int (*)(G(pcre2_substitute_callout_block_,BITTWO) *, void *))b,c)
1898
1899#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1900 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1901 a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1902 G(g,BITONE),h,(G(PCRE2_SPTR,BITONE))i,j, \
1903 (G(PCRE2_UCHAR,BITONE) *)k,l); \
1904 else \
1905 a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1906 G(g,BITTWO),h,(G(PCRE2_SPTR,BITTWO))i,j, \
1907 (G(PCRE2_UCHAR,BITTWO) *)k,l)
1908
1909#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1910 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1911 a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1912 (G(PCRE2_UCHAR,BITONE) *)d,e); \
1913 else \
1914 a = G(pcre2_substring_copy_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1915 (G(PCRE2_UCHAR,BITTWO) *)d,e)
1916
1917#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1918 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1919 a = G(pcre2_substring_copy_bynumber_,BITONE)(G(b,BITONE),c,\
1920 (G(PCRE2_UCHAR,BITONE) *)d,e); \
1921 else \
1922 a = G(pcre2_substring_copy_bynumber_,BITTWO)(G(b,BITTWO),c,\
1923 (G(PCRE2_UCHAR,BITTWO) *)d,e)
1924
1925#define PCRE2_SUBSTRING_FREE(a) \
1926 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1927 G(pcre2_substring_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1928 else G(pcre2_substring_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1929
1930#define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1931 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1932 a = G(pcre2_substring_get_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1933 (G(PCRE2_UCHAR,BITONE) **)d,e); \
1934 else \
1935 a = G(pcre2_substring_get_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1936 (G(PCRE2_UCHAR,BITTWO) **)d,e)
1937
1938#define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1939 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1940 a = G(pcre2_substring_get_bynumber_,BITONE)(G(b,BITONE),c,\
1941 (G(PCRE2_UCHAR,BITONE) **)d,e); \
1942 else \
1943 a = G(pcre2_substring_get_bynumber_,BITTWO)(G(b,BITTWO),c,\
1944 (G(PCRE2_UCHAR,BITTWO) **)d,e)
1945
1946#define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1947 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1948 a = G(pcre2_substring_length_byname_,BITONE)(G(b,BITONE),G(c,BITONE),d); \
1949 else \
1950 a = G(pcre2_substring_length_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),d)
1951
1952#define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1953 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1954 a = G(pcre2_substring_length_bynumber_,BITONE)(G(b,BITONE),c,d); \
1955 else \
1956 a = G(pcre2_substring_length_bynumber_,BITTWO)(G(b,BITTWO),c,d)
1957
1958#define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1959 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1960 a = G(pcre2_substring_list_get_,BITONE)(G(b,BITONE), \
1961 (G(PCRE2_UCHAR,BITONE) ***)c,d); \
1962 else \
1963 a = G(pcre2_substring_list_get_,BITTWO)(G(b,BITTWO), \
1964 (G(PCRE2_UCHAR,BITTWO) ***)c,d)
1965
1966#define PCRE2_SUBSTRING_LIST_FREE(a) \
1967 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1968 G(pcre2_substring_list_free_,BITONE)((G(PCRE2_SPTR,BITONE) *)a); \
1969 else \
1970 G(pcre2_substring_list_free_,BITTWO)((G(PCRE2_SPTR,BITTWO) *)a)
1971
1972#define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1973 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1974 a = G(pcre2_substring_number_from_name_,BITONE)(G(b,BITONE),G(c,BITONE)); \
1975 else \
1976 a = G(pcre2_substring_number_from_name_,BITTWO)(G(b,BITTWO),G(c,BITTWO))
1977
1978#define PTR(x) ( \
1979 (test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \
1980 (void *)G(x,BITTWO))
1981
1982#define SETFLD(x,y,z) \
1983 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y = z; \
1984 else G(x,BITTWO)->y = z
1985
1986#define SETFLDVEC(x,y,v,z) \
1987 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y[v] = z; \
1988 else G(x,BITTWO)->y[v] = z
1989
1990#define SETOP(x,y,z) \
1991 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE) z y; \
1992 else G(x,BITTWO) z y
1993
1994#define SETCASTPTR(x,y) \
1995 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1996 G(x,BITONE) = (G(G(uint,BITONE),_t) *)(y); \
1997 else \
1998 G(x,BITTWO) = (G(G(uint,BITTWO),_t) *)(y)
1999
2000#define STRLEN(p) ((test_mode == G(G(PCRE,BITONE),_MODE))? \
2001 G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p) : \
2002 G(strlen,BITTWO)((G(PCRE2_SPTR,BITTWO))p))
2003
2004#define SUB1(a,b) \
2005 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2006 G(a,BITONE)(G(b,BITONE)); \
2007 else \
2008 G(a,BITTWO)(G(b,BITTWO))
2009
2010#define SUB2(a,b,c) \
2011 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2012 G(a,BITONE))(G(b,BITONE),G(c,BITONE)); \
2013 else \
2014 G(a,BITTWO))(G(b,BITTWO),G(c,BITTWO))
2015
2016#define TEST(x,r,y) ( \
2017 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE) r (y)) || \
2018 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO) r (y)))
2019
2020#define TESTFLD(x,f,r,y) ( \
2021 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE)->f r (y)) || \
2022 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO)->f r (y)))
2023
2024
2025#endif /* Two out of three modes */
2026
2027/* ----- End of cases where more than one mode is supported ----- */
2028
2029
2030/* ----- Only 8-bit mode is supported ----- */
2031
2032#elif defined SUPPORT_PCRE2_8
2033#define CASTFLD(t,a,b) (t)(G(a,8)->b)
2034#define CASTVAR(t,x) (t)G(x,8)
2035#define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR8)(a))[b])
2036#define CONCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8))
2037#define CONVERT_COPY(a,b,c) memcpy(G(a,8),(char *)b, c)
2038#define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8))
2039#define FLD(a,b) G(a,8)->b
2040#define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8))
2041#define PCHARS(lv, p, offset, len, utf, f) \
2042 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
2043#define PCHARSV(p, offset, len, utf, f) \
2044 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
2045#define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2046 a = pcre2_callout_enumerate_8(compiled_code8, \
2047 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c)
2048#define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b)
2049#define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8))
2050#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_8(G(b,8))
2051#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2052 G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g)
2053#define PCRE2_CONVERTED_PATTERN_FREE(a) \
2054 pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a)
2055#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2056 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j)
2057#define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2058 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size))
2059#define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_8(G(b,8))
2060#define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8))
2061#define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_8(G(a,8),b)
2062#define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8))
2063#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2064 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
2065#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2066 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d);
2067#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2068 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c);
2069#define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a);
2070#define PCRE2_MAKETABLES(a) a = pcre2_maketables_8(NULL)
2071#define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2072 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
2073#define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c)
2074#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2075 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c)
2076#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
2077#define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8))
2078#define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
2079#define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
2080#define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2081 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8))
2082#define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2083 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8))
2084#define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_8(a)
2085#define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2086 r = pcre2_serialize_get_number_of_codes_8(a)
2087#define PCRE2_SET_CALLOUT(a,b,c) \
2088 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c)
2089#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
2090#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2091 pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
2092#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
2093#define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_8(G(a,8),b)
2094#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_8(G(a,8),b)
2095#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
2096#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
2097#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
2098#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
2099#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
2100#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2101 pcre2_set_substitute_callout_8(G(a,8), \
2102 (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c)
2103#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2104 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
2105 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
2106#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2107 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
2108#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2109 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e)
2110#define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_8((PCRE2_UCHAR8 *)a)
2111#define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2112 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e)
2113#define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2114 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e)
2115#define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2116 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d)
2117#define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2118 a = pcre2_substring_length_bynumber_8(G(b,8),c,d)
2119#define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2120 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d)
2121#define PCRE2_SUBSTRING_LIST_FREE(a) \
2122 pcre2_substring_list_free_8((PCRE2_SPTR8 *)a)
2123#define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2124 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8));
2125#define PTR(x) (void *)G(x,8)
2126#define SETFLD(x,y,z) G(x,8)->y = z
2127#define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z
2128#define SETOP(x,y,z) G(x,8) z y
2129#define SETCASTPTR(x,y) G(x,8) = (uint8_t *)(y)
2130#define STRLEN(p) (int)strlen((char *)p)
2131#define SUB1(a,b) G(a,8)(G(b,8))
2132#define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8))
2133#define TEST(x,r,y) (G(x,8) r (y))
2134#define TESTFLD(x,f,r,y) (G(x,8)->f r (y))
2135
2136
2137/* ----- Only 16-bit mode is supported ----- */
2138
2139#elif defined SUPPORT_PCRE2_16
2140#define CASTFLD(t,a,b) (t)(G(a,16)->b)
2141#define CASTVAR(t,x) (t)G(x,16)
2142#define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR16)(a))[b])
2143#define CONCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16))
2144#define CONVERT_COPY(a,b,c) memcpy(G(a,16),(char *)b, (c)*2)
2145#define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16))
2146#define FLD(a,b) G(a,16)->b
2147#define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16))
2148#define PCHARS(lv, p, offset, len, utf, f) \
2149 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2150#define PCHARSV(p, offset, len, utf, f) \
2151 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2152#define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2153 a = pcre2_callout_enumerate_16(compiled_code16, \
2154 (int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c)
2155#define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b)
2156#define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16))
2157#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_16(G(b,16))
2158#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2159 G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g)
2160#define PCRE2_CONVERTED_PATTERN_FREE(a) \
2161 pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a)
2162#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2163 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j)
2164#define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2165 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2))
2166#define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16))
2167#define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16))
2168#define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_16(G(a,16),b)
2169#define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16))
2170#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2171 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2172#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2173 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d);
2174#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2175 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c);
2176#define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a);
2177#define PCRE2_MAKETABLES(a) a = pcre2_maketables_16(NULL)
2178#define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2179 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2180#define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c)
2181#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2182 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c)
2183#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
2184#define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16))
2185#define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d)
2186#define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a)
2187#define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2188 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16))
2189#define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2190 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16))
2191#define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_16(a)
2192#define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2193 r = pcre2_serialize_get_number_of_codes_16(a)
2194#define PCRE2_SET_CALLOUT(a,b,c) \
2195 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c);
2196#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
2197#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2198 pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
2199#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
2200#define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_16(G(a,16),b)
2201#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_16(G(a,16),b)
2202#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
2203#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
2204#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
2205#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
2206#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
2207#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2208 pcre2_set_substitute_callout_16(G(a,16), \
2209 (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c)
2210#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2211 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
2212 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
2213#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2214 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
2215#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2216 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e)
2217#define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
2218#define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2219 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e)
2220#define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2221 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e)
2222#define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2223 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d)
2224#define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2225 a = pcre2_substring_length_bynumber_16(G(b,16),c,d)
2226#define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2227 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d)
2228#define PCRE2_SUBSTRING_LIST_FREE(a) \
2229 pcre2_substring_list_free_16((PCRE2_SPTR16 *)a)
2230#define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2231 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16));
2232#define PTR(x) (void *)G(x,16)
2233#define SETFLD(x,y,z) G(x,16)->y = z
2234#define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z
2235#define SETOP(x,y,z) G(x,16) z y
2236#define SETCASTPTR(x,y) G(x,16) = (uint16_t *)(y)
2237#define STRLEN(p) (int)strlen16((PCRE2_SPTR16)p)
2238#define SUB1(a,b) G(a,16)(G(b,16))
2239#define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16))
2240#define TEST(x,r,y) (G(x,16) r (y))
2241#define TESTFLD(x,f,r,y) (G(x,16)->f r (y))
2242
2243
2244/* ----- Only 32-bit mode is supported ----- */
2245
2246#elif defined SUPPORT_PCRE2_32
2247#define CASTFLD(t,a,b) (t)(G(a,32)->b)
2248#define CASTVAR(t,x) (t)G(x,32)
2249#define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR32)(a))[b])
2250#define CONCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
2251#define CONVERT_COPY(a,b,c) memcpy(G(a,32),(char *)b, (c)*4)
2252#define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
2253#define FLD(a,b) G(a,32)->b
2254#define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
2255#define PCHARS(lv, p, offset, len, utf, f) \
2256 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2257#define PCHARSV(p, offset, len, utf, f) \
2258 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2259#define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2260 a = pcre2_callout_enumerate_32(compiled_code32, \
2261 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
2262#define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b)
2263#define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32))
2264#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
2265#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2266 G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
2267#define PCRE2_CONVERTED_PATTERN_FREE(a) \
2268 pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
2269#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2270 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
2271#define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2272 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
2273#define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32))
2274#define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32))
2275#define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_32(G(a,32),b)
2276#define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32))
2277#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2278 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2279#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2280 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
2281#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2282 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
2283#define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
2284#define PCRE2_MAKETABLES(a) a = pcre2_maketables_32(NULL)
2285#define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2286 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2287#define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c)
2288#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2289 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
2290#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
2291#define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
2292#define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d)
2293#define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a)
2294#define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2295 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
2296#define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2297 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
2298#define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_32(a)
2299#define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2300 r = pcre2_serialize_get_number_of_codes_32(a)
2301#define PCRE2_SET_CALLOUT(a,b,c) \
2302 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c)
2303#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
2304#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2305 pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
2306#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
2307#define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_32(G(a,32),b)
2308#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_32(G(a,32),b)
2309#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
2310#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
2311#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
2312#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
2313#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
2314#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2315 pcre2_set_substitute_callout_32(G(a,32), \
2316 (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
2317#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2318 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
2319 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
2320#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2321 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
2322#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2323 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
2324#define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
2325#define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2326 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
2327#define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2328 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
2329#define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2330 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
2331#define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2332 a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
2333#define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2334 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
2335#define PCRE2_SUBSTRING_LIST_FREE(a) \
2336 pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
2337#define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2338 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32));
2339#define PTR(x) (void *)G(x,32)
2340#define SETFLD(x,y,z) G(x,32)->y = z
2341#define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z
2342#define SETOP(x,y,z) G(x,32) z y
2343#define SETCASTPTR(x,y) G(x,32) = (uint32_t *)(y)
2344#define STRLEN(p) (int)strlen32((PCRE2_SPTR32)p)
2345#define SUB1(a,b) G(a,32)(G(b,32))
2346#define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32))
2347#define TEST(x,r,y) (G(x,32) r (y))
2348#define TESTFLD(x,f,r,y) (G(x,32)->f r (y))
2349
2350#endif
2351
2352/* ----- End of mode-specific function call macros ----- */
2353
2354
2355
2356
2357/*************************************************
2358* Alternate character tables *
2359*************************************************/
2360
2361/* By default, the "tables" pointer in the compile context when calling
2362pcre2_compile() is not set (= NULL), thereby using the default tables of the
2363library. However, the tables modifier can be used to select alternate sets of
2364tables, for different kinds of testing. Note that the locale modifier also
2365adjusts the tables. */
2366
2367/* This is the set of tables distributed as default with PCRE2. It recognizes
2368only ASCII characters. */
2369
2370static const uint8_t tables1[] = {
2371
2372/* This table is a lower casing table. */
2373
2374 0, 1, 2, 3, 4, 5, 6, 7,
2375 8, 9, 10, 11, 12, 13, 14, 15,
2376 16, 17, 18, 19, 20, 21, 22, 23,
2377 24, 25, 26, 27, 28, 29, 30, 31,
2378 32, 33, 34, 35, 36, 37, 38, 39,
2379 40, 41, 42, 43, 44, 45, 46, 47,
2380 48, 49, 50, 51, 52, 53, 54, 55,
2381 56, 57, 58, 59, 60, 61, 62, 63,
2382 64, 97, 98, 99,100,101,102,103,
2383 104,105,106,107,108,109,110,111,
2384 112,113,114,115,116,117,118,119,
2385 120,121,122, 91, 92, 93, 94, 95,
2386 96, 97, 98, 99,100,101,102,103,
2387 104,105,106,107,108,109,110,111,
2388 112,113,114,115,116,117,118,119,
2389 120,121,122,123,124,125,126,127,
2390 128,129,130,131,132,133,134,135,
2391 136,137,138,139,140,141,142,143,
2392 144,145,146,147,148,149,150,151,
2393 152,153,154,155,156,157,158,159,
2394 160,161,162,163,164,165,166,167,
2395 168,169,170,171,172,173,174,175,
2396 176,177,178,179,180,181,182,183,
2397 184,185,186,187,188,189,190,191,
2398 192,193,194,195,196,197,198,199,
2399 200,201,202,203,204,205,206,207,
2400 208,209,210,211,212,213,214,215,
2401 216,217,218,219,220,221,222,223,
2402 224,225,226,227,228,229,230,231,
2403 232,233,234,235,236,237,238,239,
2404 240,241,242,243,244,245,246,247,
2405 248,249,250,251,252,253,254,255,
2406
2407/* This table is a case flipping table. */
2408
2409 0, 1, 2, 3, 4, 5, 6, 7,
2410 8, 9, 10, 11, 12, 13, 14, 15,
2411 16, 17, 18, 19, 20, 21, 22, 23,
2412 24, 25, 26, 27, 28, 29, 30, 31,
2413 32, 33, 34, 35, 36, 37, 38, 39,
2414 40, 41, 42, 43, 44, 45, 46, 47,
2415 48, 49, 50, 51, 52, 53, 54, 55,
2416 56, 57, 58, 59, 60, 61, 62, 63,
2417 64, 97, 98, 99,100,101,102,103,
2418 104,105,106,107,108,109,110,111,
2419 112,113,114,115,116,117,118,119,
2420 120,121,122, 91, 92, 93, 94, 95,
2421 96, 65, 66, 67, 68, 69, 70, 71,
2422 72, 73, 74, 75, 76, 77, 78, 79,
2423 80, 81, 82, 83, 84, 85, 86, 87,
2424 88, 89, 90,123,124,125,126,127,
2425 128,129,130,131,132,133,134,135,
2426 136,137,138,139,140,141,142,143,
2427 144,145,146,147,148,149,150,151,
2428 152,153,154,155,156,157,158,159,
2429 160,161,162,163,164,165,166,167,
2430 168,169,170,171,172,173,174,175,
2431 176,177,178,179,180,181,182,183,
2432 184,185,186,187,188,189,190,191,
2433 192,193,194,195,196,197,198,199,
2434 200,201,202,203,204,205,206,207,
2435 208,209,210,211,212,213,214,215,
2436 216,217,218,219,220,221,222,223,
2437 224,225,226,227,228,229,230,231,
2438 232,233,234,235,236,237,238,239,
2439 240,241,242,243,244,245,246,247,
2440 248,249,250,251,252,253,254,255,
2441
2442/* This table contains bit maps for various character classes. Each map is 32
2443bytes long and the bits run from the least significant end of each byte. The
2444classes that have their own maps are: space, xdigit, digit, upper, lower, word,
2445graph, print, punct, and cntrl. Other classes are built from combinations. */
2446
2447 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
2448 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2449 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2450 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2451
2452 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2453 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
2454 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2455 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2456
2457 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2458 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2459 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2460 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2461
2462 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2463 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
2464 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2465 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2466
2467 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2468 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
2469 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2470 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2471
2472 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2473 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
2474 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2475 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2476
2477 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
2478 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2479 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2480 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2481
2482 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
2483 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2484 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2485 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2486
2487 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
2488 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
2489 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2490 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2491
2492 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
2493 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
2494 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2495 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2496
2497/* This table identifies various classes of character by individual bits:
2498 0x01 white space character
2499 0x02 letter
2500 0x04 decimal digit
2501 0x08 hexadecimal digit
2502 0x10 alphanumeric or '_'
2503 0x80 regular expression metacharacter or binary zero
2504*/
2505
2506 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
2507 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
2508 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
2509 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
2510 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
2511 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
2512 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
2513 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
2514 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
2515 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
2516 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
2517 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
2518 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
2519 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
2520 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
2521 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
2522 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
2523 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
2524 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
2525 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
2526 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
2527 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
2528 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
2529 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
2530 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
2531 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
2532 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
2533 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
2534 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
2535 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
2536 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
2537 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
2538
2539/* This is a set of tables that came originally from a Windows user. It seems
2540to be at least an approximation of ISO 8859. In particular, there are
2541characters greater than 128 that are marked as spaces, letters, etc. */
2542
2543static const uint8_t tables2[] = {
25440,1,2,3,4,5,6,7,
25458,9,10,11,12,13,14,15,
254616,17,18,19,20,21,22,23,
254724,25,26,27,28,29,30,31,
254832,33,34,35,36,37,38,39,
254940,41,42,43,44,45,46,47,
255048,49,50,51,52,53,54,55,
255156,57,58,59,60,61,62,63,
255264,97,98,99,100,101,102,103,
2553104,105,106,107,108,109,110,111,
2554112,113,114,115,116,117,118,119,
2555120,121,122,91,92,93,94,95,
255696,97,98,99,100,101,102,103,
2557104,105,106,107,108,109,110,111,
2558112,113,114,115,116,117,118,119,
2559120,121,122,123,124,125,126,127,
2560128,129,130,131,132,133,134,135,
2561136,137,138,139,140,141,142,143,
2562144,145,146,147,148,149,150,151,
2563152,153,154,155,156,157,158,159,
2564160,161,162,163,164,165,166,167,
2565168,169,170,171,172,173,174,175,
2566176,177,178,179,180,181,182,183,
2567184,185,186,187,188,189,190,191,
2568224,225,226,227,228,229,230,231,
2569232,233,234,235,236,237,238,239,
2570240,241,242,243,244,245,246,215,
2571248,249,250,251,252,253,254,223,
2572224,225,226,227,228,229,230,231,
2573232,233,234,235,236,237,238,239,
2574240,241,242,243,244,245,246,247,
2575248,249,250,251,252,253,254,255,
25760,1,2,3,4,5,6,7,
25778,9,10,11,12,13,14,15,
257816,17,18,19,20,21,22,23,
257924,25,26,27,28,29,30,31,
258032,33,34,35,36,37,38,39,
258140,41,42,43,44,45,46,47,
258248,49,50,51,52,53,54,55,
258356,57,58,59,60,61,62,63,
258464,97,98,99,100,101,102,103,
2585104,105,106,107,108,109,110,111,
2586112,113,114,115,116,117,118,119,
2587120,121,122,91,92,93,94,95,
258896,65,66,67,68,69,70,71,
258972,73,74,75,76,77,78,79,
259080,81,82,83,84,85,86,87,
259188,89,90,123,124,125,126,127,
2592128,129,130,131,132,133,134,135,
2593136,137,138,139,140,141,142,143,
2594144,145,146,147,148,149,150,151,
2595152,153,154,155,156,157,158,159,
2596160,161,162,163,164,165,166,167,
2597168,169,170,171,172,173,174,175,
2598176,177,178,179,180,181,182,183,
2599184,185,186,187,188,189,190,191,
2600224,225,226,227,228,229,230,231,
2601232,233,234,235,236,237,238,239,
2602240,241,242,243,244,245,246,215,
2603248,249,250,251,252,253,254,223,
2604192,193,194,195,196,197,198,199,
2605200,201,202,203,204,205,206,207,
2606208,209,210,211,212,213,214,247,
2607216,217,218,219,220,221,222,255,
26080,62,0,0,1,0,0,0,
26090,0,0,0,0,0,0,0,
261032,0,0,0,1,0,0,0,
26110,0,0,0,0,0,0,0,
26120,0,0,0,0,0,255,3,
2613126,0,0,0,126,0,0,0,
26140,0,0,0,0,0,0,0,
26150,0,0,0,0,0,0,0,
26160,0,0,0,0,0,255,3,
26170,0,0,0,0,0,0,0,
26180,0,0,0,0,0,12,2,
26190,0,0,0,0,0,0,0,
26200,0,0,0,0,0,0,0,
2621254,255,255,7,0,0,0,0,
26220,0,0,0,0,0,0,0,
2623255,255,127,127,0,0,0,0,
26240,0,0,0,0,0,0,0,
26250,0,0,0,254,255,255,7,
26260,0,0,0,0,4,32,4,
26270,0,0,128,255,255,127,255,
26280,0,0,0,0,0,255,3,
2629254,255,255,135,254,255,255,7,
26300,0,0,0,0,4,44,6,
2631255,255,127,255,255,255,127,255,
26320,0,0,0,254,255,255,255,
2633255,255,255,255,255,255,255,127,
26340,0,0,0,254,255,255,255,
2635255,255,255,255,255,255,255,255,
26360,2,0,0,255,255,255,255,
2637255,255,255,255,255,255,255,127,
26380,0,0,0,255,255,255,255,
2639255,255,255,255,255,255,255,255,
26400,0,0,0,254,255,0,252,
26411,0,0,248,1,0,0,120,
26420,0,0,0,254,255,255,255,
26430,0,128,0,0,0,128,0,
2644255,255,255,255,0,0,0,0,
26450,0,0,0,0,0,0,128,
2646255,255,255,255,0,0,0,0,
26470,0,0,0,0,0,0,0,
2648128,0,0,0,0,0,0,0,
26490,1,1,0,1,1,0,0,
26500,0,0,0,0,0,0,0,
26510,0,0,0,0,0,0,0,
26521,0,0,0,128,0,0,0,
2653128,128,128,128,0,0,128,0,
265428,28,28,28,28,28,28,28,
265528,28,0,0,0,0,0,128,
26560,26,26,26,26,26,26,18,
265718,18,18,18,18,18,18,18,
265818,18,18,18,18,18,18,18,
265918,18,18,128,128,0,128,16,
26600,26,26,26,26,26,26,18,
266118,18,18,18,18,18,18,18,
266218,18,18,18,18,18,18,18,
266318,18,18,128,128,0,0,0,
26640,0,0,0,0,1,0,0,
26650,0,0,0,0,0,0,0,
26660,0,0,0,0,0,0,0,
26670,0,0,0,0,0,0,0,
26681,0,0,0,0,0,0,0,
26690,0,18,0,0,0,0,0,
26700,0,20,20,0,18,0,0,
26710,20,18,0,0,0,0,0,
267218,18,18,18,18,18,18,18,
267318,18,18,18,18,18,18,18,
267418,18,18,18,18,18,18,0,
267518,18,18,18,18,18,18,18,
267618,18,18,18,18,18,18,18,
267718,18,18,18,18,18,18,18,
267818,18,18,18,18,18,18,0,
267918,18,18,18,18,18,18,18
2680};
2681
2682
2683
2684#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
2685/*************************************************
2686* Emulated memmove() for systems without it *
2687*************************************************/
2688
2689/* This function can make use of bcopy() if it is available. Otherwise do it by
2690steam, as there are some non-Unix environments that lack both memmove() and
2691bcopy(). */
2692
2693static void *
2694emulated_memmove(void *d, const void *s, size_t n)
2695{
2696#ifdef HAVE_BCOPY
2697bcopy(s, d, n);
2698return d;
2699#else
2700size_t i;
2701unsigned char *dest = (unsigned char *)d;
2702const unsigned char *src = (const unsigned char *)s;
2703if (dest > src)
2704 {
2705 dest += n;
2706 src += n;
2707 for (i = 0; i < n; ++i) *(--dest) = *(--src);
2708 return (void *)dest;
2709 }
2710else
2711 {
2712 for (i = 0; i < n; ++i) *dest++ = *src++;
2713 return (void *)(dest - n);
2714 }
2715#endif /* not HAVE_BCOPY */
2716}
2717#undef memmove
2718#define memmove(d,s,n) emulated_memmove(d,s,n)
2719#endif /* not VPCOMPAT && not HAVE_MEMMOVE */
2720
2721
2722
2723#ifndef HAVE_STRERROR
2724/*************************************************
2725* Provide strerror() for non-ANSI libraries *
2726*************************************************/
2727
2728/* Some old-fashioned systems (e.g. SunOS4) didn't have strerror() in their
2729libraries. They may no longer be around, but just in case, we can try to
2730provide the same facility by this simple alternative function. */
2731
2732extern int sys_nerr;
2733extern char *sys_errlist[];
2734
2735char *
2736strerror(int n)
2737{
2738if (n < 0 || n >= sys_nerr) return "unknown error number";
2739return sys_errlist[n];
2740}
2741#endif /* HAVE_STRERROR */
2742
2743
2744
2745/*************************************************
2746* Local memory functions *
2747*************************************************/
2748
2749/* Alternative memory functions, to test functionality. */
2750
2751static void *my_malloc(PCRE2_SIZE size, void *data)
2752{
2753void *block = malloc(size);
2754(void)data;
2755if (show_memory)
2756 {
2757 if (block == NULL)
2758 {
Elliott Hughes16619d62021-10-29 12:10:38 -07002759 fprintf(outfile, "** malloc() failed for %" SIZ_FORM "\n", size);
Elliott Hughes5b808042021-10-01 10:56:10 -07002760 }
2761 else
2762 {
Elliott Hughes16619d62021-10-29 12:10:38 -07002763 fprintf(outfile, "malloc %5" SIZ_FORM, size);
Elliott Hughes5b808042021-10-01 10:56:10 -07002764#ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2765 fprintf(outfile, " %p", block); /* Not portable */
2766#endif
2767 if (malloclistptr < MALLOCLISTSIZE)
2768 {
2769 malloclist[malloclistptr] = block;
2770 malloclistlength[malloclistptr++] = size;
2771 }
2772 else
2773 fprintf(outfile, " (not remembered)");
2774 fprintf(outfile, "\n");
2775 }
2776 }
2777return block;
2778}
2779
2780static void my_free(void *block, void *data)
2781{
2782(void)data;
2783if (show_memory)
2784 {
2785 uint32_t i, j;
2786 BOOL found = FALSE;
2787
2788 fprintf(outfile, "free");
2789 for (i = 0; i < malloclistptr; i++)
2790 {
2791 if (block == malloclist[i])
2792 {
Elliott Hughes16619d62021-10-29 12:10:38 -07002793 fprintf(outfile, " %5" SIZ_FORM, malloclistlength[i]);
Elliott Hughes5b808042021-10-01 10:56:10 -07002794 malloclistptr--;
2795 for (j = i; j < malloclistptr; j++)
2796 {
2797 malloclist[j] = malloclist[j+1];
2798 malloclistlength[j] = malloclistlength[j+1];
2799 }
2800 found = TRUE;
2801 break;
2802 }
2803 }
2804 if (!found) fprintf(outfile, " unremembered block");
2805#ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2806 fprintf(outfile, " %p", block); /* Not portable */
2807#endif
2808 fprintf(outfile, "\n");
2809 }
2810free(block);
2811}
2812
2813
2814
2815/*************************************************
2816* Callback function for stack guard *
2817*************************************************/
2818
2819/* This is set up to be called from pcre2_compile() when the stackguard=n
2820modifier sets a value greater than zero. The test we do is whether the
2821parenthesis nesting depth is greater than the value set by the modifier.
2822
2823Argument: the current parenthesis nesting depth
2824Returns: non-zero to kill the compilation
2825*/
2826
2827static int
2828stack_guard(uint32_t depth, void *user_data)
2829{
2830(void)user_data;
2831return depth > pat_patctl.stackguard_test;
2832}
2833
2834
2835/*************************************************
2836* JIT memory callback *
2837*************************************************/
2838
2839static PCRE2_JIT_STACK*
2840jit_callback(void *arg)
2841{
2842jit_was_used = TRUE;
2843return (PCRE2_JIT_STACK *)arg;
2844}
2845
2846
2847/*************************************************
2848* Convert UTF-8 character to code point *
2849*************************************************/
2850
2851/* This function reads one or more bytes that represent a UTF-8 character,
2852and returns the codepoint of that character. Note that the function supports
2853the original UTF-8 definition of RFC 2279, allowing for values in the range 0
2854to 0x7fffffff, up to 6 bytes long. This makes it possible to generate
2855codepoints greater than 0x10ffff which are useful for testing PCRE2's error
2856checking, and also for generating 32-bit non-UTF data values above the UTF
2857limit.
2858
2859Argument:
2860 utf8bytes a pointer to the byte vector
2861 vptr a pointer to an int to receive the value
2862
2863Returns: > 0 => the number of bytes consumed
2864 -6 to 0 => malformed UTF-8 character at offset = (-return)
2865*/
2866
2867static int
2868utf82ord(PCRE2_SPTR8 utf8bytes, uint32_t *vptr)
2869{
2870uint32_t c = *utf8bytes++;
2871uint32_t d = c;
2872int i, j, s;
2873
2874for (i = -1; i < 6; i++) /* i is number of additional bytes */
2875 {
2876 if ((d & 0x80) == 0) break;
2877 d <<= 1;
2878 }
2879
2880if (i == -1) { *vptr = c; return 1; } /* ascii character */
2881if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
2882
2883/* i now has a value in the range 1-5 */
2884
2885s = 6*i;
2886d = (c & utf8_table3[i]) << s;
2887
2888for (j = 0; j < i; j++)
2889 {
2890 c = *utf8bytes++;
2891 if ((c & 0xc0) != 0x80) return -(j+1);
2892 s -= 6;
2893 d |= (c & 0x3f) << s;
2894 }
2895
2896/* Check that encoding was the correct unique one */
2897
2898for (j = 0; j < utf8_table1_size; j++)
2899 if (d <= (uint32_t)utf8_table1[j]) break;
2900if (j != i) return -(i+1);
2901
2902/* Valid value */
2903
2904*vptr = d;
2905return i+1;
2906}
2907
2908
2909
2910/*************************************************
2911* Print one character *
2912*************************************************/
2913
2914/* Print a single character either literally, or as a hex escape, and count how
2915many printed characters are used.
2916
2917Arguments:
2918 c the character
2919 utf TRUE in UTF mode
2920 f the FILE to print to, or NULL just to count characters
2921
2922Returns: number of characters written
2923*/
2924
2925static int
2926pchar(uint32_t c, BOOL utf, FILE *f)
2927{
2928int n = 0;
2929char tempbuffer[16];
2930
2931if (PRINTOK(c))
2932 {
2933 if (f != NULL) fprintf(f, "%c", c);
2934 return 1;
2935 }
2936
2937if (c < 0x100)
2938 {
2939 if (utf)
2940 {
2941 if (f != NULL) fprintf(f, "\\x{%02x}", c);
2942 return 6;
2943 }
2944 else
2945 {
2946 if (f != NULL) fprintf(f, "\\x%02x", c);
2947 return 4;
2948 }
2949 }
2950
2951if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2952 else n = sprintf(tempbuffer, "\\x{%02x}", c);
2953
2954return n >= 0 ? n : 0;
2955}
2956
2957
2958
2959#ifdef SUPPORT_PCRE2_16
2960/*************************************************
2961* Find length of 0-terminated 16-bit string *
2962*************************************************/
2963
2964static size_t strlen16(PCRE2_SPTR16 p)
2965{
2966PCRE2_SPTR16 pp = p;
2967while (*pp != 0) pp++;
2968return (int)(pp - p);
2969}
2970#endif /* SUPPORT_PCRE2_16 */
2971
2972
2973
2974#ifdef SUPPORT_PCRE2_32
2975/*************************************************
2976* Find length of 0-terminated 32-bit string *
2977*************************************************/
2978
2979static size_t strlen32(PCRE2_SPTR32 p)
2980{
2981PCRE2_SPTR32 pp = p;
2982while (*pp != 0) pp++;
2983return (int)(pp - p);
2984}
2985#endif /* SUPPORT_PCRE2_32 */
2986
2987
2988#ifdef SUPPORT_PCRE2_8
2989/*************************************************
2990* Print 8-bit character string *
2991*************************************************/
2992
2993/* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2994For printing *MARK strings, a negative length is given, indicating that the
2995length is in the first code unit. If handed a NULL file, this function just
2996counts chars without printing (because pchar() does that). */
2997
2998static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
2999{
3000uint32_t c = 0;
3001int yield = 0;
3002if (length < 0) length = *p++;
3003while (length-- > 0)
3004 {
3005 if (utf)
3006 {
3007 int rc = utf82ord(p, &c);
3008 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
3009 {
3010 length -= rc - 1;
3011 p += rc;
3012 yield += pchar(c, utf, f);
3013 continue;
3014 }
3015 }
3016 c = *p++;
3017 yield += pchar(c, utf, f);
3018 }
3019
3020return yield;
3021}
3022#endif
3023
3024
3025#ifdef SUPPORT_PCRE2_16
3026/*************************************************
3027* Print 16-bit character string *
3028*************************************************/
3029
3030/* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
3031For printing *MARK strings, a negative length is given, indicating that the
3032length is in the first code unit. If handed a NULL file, just counts chars
3033without printing. */
3034
3035static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f)
3036{
3037int yield = 0;
3038if (length < 0) length = *p++;
3039while (length-- > 0)
3040 {
3041 uint32_t c = *p++ & 0xffff;
3042 if (utf && c >= 0xD800 && c < 0xDC00 && length > 0)
3043 {
3044 int d = *p & 0xffff;
3045 if (d >= 0xDC00 && d <= 0xDFFF)
3046 {
3047 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
3048 length--;
3049 p++;
3050 }
3051 }
3052 yield += pchar(c, utf, f);
3053 }
3054return yield;
3055}
3056#endif /* SUPPORT_PCRE2_16 */
3057
3058
3059
3060#ifdef SUPPORT_PCRE2_32
3061/*************************************************
3062* Print 32-bit character string *
3063*************************************************/
3064
3065/* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
3066For printing *MARK strings, a negative length is given, indicating that the
3067length is in the first code unit. If handed a NULL file, just counts chars
3068without printing. */
3069
3070static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
3071{
3072int yield = 0;
3073(void)(utf); /* Avoid compiler warning */
3074if (length < 0) length = *p++;
3075while (length-- > 0)
3076 {
3077 uint32_t c = *p++;
3078 yield += pchar(c, utf, f);
3079 }
3080return yield;
3081}
3082#endif /* SUPPORT_PCRE2_32 */
3083
3084
3085
3086
3087/*************************************************
3088* Convert character value to UTF-8 *
3089*************************************************/
3090
3091/* This function takes an integer value in the range 0 - 0x7fffffff
3092and encodes it as a UTF-8 character in 0 to 6 bytes. It is needed even when the
30938-bit library is not supported, to generate UTF-8 output for non-ASCII
3094characters.
3095
3096Arguments:
3097 cvalue the character value
3098 utf8bytes pointer to buffer for result - at least 6 bytes long
3099
3100Returns: number of characters placed in the buffer
3101*/
3102
3103static int
3104ord2utf8(uint32_t cvalue, uint8_t *utf8bytes)
3105{
3106int i, j;
3107if (cvalue > 0x7fffffffu)
3108 return -1;
3109for (i = 0; i < utf8_table1_size; i++)
3110 if (cvalue <= (uint32_t)utf8_table1[i]) break;
3111utf8bytes += i;
3112for (j = i; j > 0; j--)
3113 {
3114 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
3115 cvalue >>= 6;
3116 }
3117*utf8bytes = utf8_table2[i] | cvalue;
3118return i + 1;
3119}
3120
3121
3122
3123#ifdef SUPPORT_PCRE2_16
3124/*************************************************
3125* Convert string to 16-bit *
3126*************************************************/
3127
3128/* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3129the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3130code values from 0 to 0x7fffffff. However, values greater than the later UTF
3131limit of 0x10ffff cause an error. In non-UTF mode the input is interpreted as
3132UTF-8 if the utf8_input modifier is set, but an error is generated for values
3133greater than 0xffff.
3134
3135If all the input bytes are ASCII, the space needed for a 16-bit string is
3136exactly double the 8-bit size. Otherwise, the size needed for a 16-bit string
3137is no more than double, because up to 0xffff uses no more than 3 bytes in UTF-8
3138but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes
3139in UTF-16. The result is always left in pbuffer16. Impose a minimum size to
3140save repeated re-sizing.
3141
3142Note that this function does not object to surrogate values. This is
3143deliberate; it makes it possible to construct UTF-16 strings that are invalid,
3144for the purpose of testing that they are correctly faulted.
3145
3146Arguments:
3147 p points to a byte string
3148 utf true in UTF mode
3149 lenptr points to number of bytes in the string (excluding trailing zero)
3150
3151Returns: 0 on success, with the length updated to the number of 16-bit
3152 data items used (excluding the trailing zero)
3153 OR -1 if a UTF-8 string is malformed
3154 OR -2 if a value > 0x10ffff is encountered in UTF mode
3155 OR -3 if a value > 0xffff is encountered when not in UTF mode
3156*/
3157
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07003158static int
Elliott Hughes5b808042021-10-01 10:56:10 -07003159to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3160{
3161uint16_t *pp;
3162PCRE2_SIZE len = *lenptr;
3163
3164if (pbuffer16_size < 2*len + 2)
3165 {
3166 if (pbuffer16 != NULL) free(pbuffer16);
3167 pbuffer16_size = 2*len + 2;
3168 if (pbuffer16_size < 4096) pbuffer16_size = 4096;
3169 pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
3170 if (pbuffer16 == NULL)
3171 {
3172 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
Elliott Hughes16619d62021-10-29 12:10:38 -07003173 pbuffer16_size);
Elliott Hughes5b808042021-10-01 10:56:10 -07003174 exit(1);
3175 }
3176 }
3177
3178pp = pbuffer16;
3179if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3180 {
3181 for (; len > 0; len--) *pp++ = *p++;
3182 }
3183else while (len > 0)
3184 {
3185 uint32_t c;
3186 int chlen = utf82ord(p, &c);
3187 if (chlen <= 0) return -1;
3188 if (!utf && c > 0xffff) return -3;
3189 if (c > 0x10ffff) return -2;
3190 p += chlen;
3191 len -= chlen;
3192 if (c < 0x10000) *pp++ = c; else
3193 {
3194 c -= 0x10000;
3195 *pp++ = 0xD800 | (c >> 10);
3196 *pp++ = 0xDC00 | (c & 0x3ff);
3197 }
3198 }
3199
3200*pp = 0;
3201*lenptr = pp - pbuffer16;
3202return 0;
3203}
3204#endif
3205
3206
3207
3208#ifdef SUPPORT_PCRE2_32
3209/*************************************************
3210* Convert string to 32-bit *
3211*************************************************/
3212
3213/* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3214the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3215code values from 0 to 0x7fffffff. However, values greater than the later UTF
3216limit of 0x10ffff cause an error.
3217
3218In non-UTF mode the input is interpreted as UTF-8 if the utf8_input modifier
3219is set, and no limit is imposed. There is special interpretation of the 0xff
3220byte (which is illegal in UTF-8) in this case: it causes the top bit of the
3221next character to be set. This provides a way of generating 32-bit characters
3222greater than 0x7fffffff.
3223
3224If all the input bytes are ASCII, the space needed for a 32-bit string is
3225exactly four times the 8-bit size. Otherwise, the size needed for a 32-bit
3226string is no more than four times, because the number of characters must be
3227less than the number of bytes. The result is always left in pbuffer32. Impose a
3228minimum size to save repeated re-sizing.
3229
3230Note that this function does not object to surrogate values. This is
3231deliberate; it makes it possible to construct UTF-32 strings that are invalid,
3232for the purpose of testing that they are correctly faulted.
3233
3234Arguments:
3235 p points to a byte string
3236 utf true in UTF mode
3237 lenptr points to number of bytes in the string (excluding trailing zero)
3238
3239Returns: 0 on success, with the length updated to the number of 32-bit
3240 data items used (excluding the trailing zero)
3241 OR -1 if a UTF-8 string is malformed
3242 OR -2 if a value > 0x10ffff is encountered in UTF mode
3243*/
3244
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07003245static int
Elliott Hughes5b808042021-10-01 10:56:10 -07003246to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3247{
3248uint32_t *pp;
3249PCRE2_SIZE len = *lenptr;
3250
3251if (pbuffer32_size < 4*len + 4)
3252 {
3253 if (pbuffer32 != NULL) free(pbuffer32);
3254 pbuffer32_size = 4*len + 4;
3255 if (pbuffer32_size < 8192) pbuffer32_size = 8192;
3256 pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
3257 if (pbuffer32 == NULL)
3258 {
3259 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
Elliott Hughes16619d62021-10-29 12:10:38 -07003260 pbuffer32_size);
Elliott Hughes5b808042021-10-01 10:56:10 -07003261 exit(1);
3262 }
3263 }
3264
3265pp = pbuffer32;
3266
3267if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3268 {
3269 for (; len > 0; len--) *pp++ = *p++;
3270 }
3271
3272else while (len > 0)
3273 {
3274 int chlen;
3275 uint32_t c;
3276 uint32_t topbit = 0;
3277 if (!utf && *p == 0xff && len > 1)
3278 {
3279 topbit = 0x80000000u;
3280 p++;
3281 len--;
3282 }
3283 chlen = utf82ord(p, &c);
3284 if (chlen <= 0) return -1;
3285 if (utf && c > 0x10ffff) return -2;
3286 p += chlen;
3287 len -= chlen;
3288 *pp++ = c | topbit;
3289 }
3290
3291*pp = 0;
3292*lenptr = pp - pbuffer32;
3293return 0;
3294}
3295#endif /* SUPPORT_PCRE2_32 */
3296
3297
3298
3299/* This function is no longer used. Keep it around for a while, just in case it
3300needs to be re-instated. */
3301
3302#ifdef NEVERNEVERNEVER
3303
3304/*************************************************
3305* Move back by so many characters *
3306*************************************************/
3307
3308/* Given a code unit offset in a subject string, move backwards by a number of
3309characters, and return the resulting offset.
3310
3311Arguments:
3312 subject pointer to the string
3313 offset start offset
3314 count count to move back by
3315 utf TRUE if in UTF mode
3316
3317Returns: a possibly changed offset
3318*/
3319
3320static PCRE2_SIZE
3321backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf)
3322{
3323if (!utf || test_mode == PCRE32_MODE)
3324 return (count >= offset)? 0 : (offset - count);
3325
3326else if (test_mode == PCRE8_MODE)
3327 {
3328 PCRE2_SPTR8 pp = (PCRE2_SPTR8)subject + offset;
3329 for (; count > 0 && pp > (PCRE2_SPTR8)subject; count--)
3330 {
3331 pp--;
3332 while ((*pp & 0xc0) == 0x80) pp--;
3333 }
3334 return pp - (PCRE2_SPTR8)subject;
3335 }
3336
3337else /* 16-bit mode */
3338 {
3339 PCRE2_SPTR16 pp = (PCRE2_SPTR16)subject + offset;
3340 for (; count > 0 && pp > (PCRE2_SPTR16)subject; count--)
3341 {
3342 pp--;
3343 if ((*pp & 0xfc00) == 0xdc00) pp--;
3344 }
3345 return pp - (PCRE2_SPTR16)subject;
3346 }
3347}
3348#endif /* NEVERNEVERNEVER */
3349
3350
3351
3352/*************************************************
3353* Expand input buffers *
3354*************************************************/
3355
3356/* This function doubles the size of the input buffer and the buffer for
3357keeping an 8-bit copy of patterns (pbuffer8), and copies the current buffers to
3358the new ones.
3359
3360Arguments: none
3361Returns: nothing (aborts if malloc() fails)
3362*/
3363
3364static void
3365expand_input_buffers(void)
3366{
3367int new_pbuffer8_size = 2*pbuffer8_size;
3368uint8_t *new_buffer = (uint8_t *)malloc(new_pbuffer8_size);
3369uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size);
3370
3371if (new_buffer == NULL || new_pbuffer8 == NULL)
3372 {
3373 fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_pbuffer8_size);
3374 exit(1);
3375 }
3376
3377memcpy(new_buffer, buffer, pbuffer8_size);
3378memcpy(new_pbuffer8, pbuffer8, pbuffer8_size);
3379
3380pbuffer8_size = new_pbuffer8_size;
3381
3382free(buffer);
3383free(pbuffer8);
3384
3385buffer = new_buffer;
3386pbuffer8 = new_pbuffer8;
3387}
3388
3389
3390
3391/*************************************************
3392* Read or extend an input line *
3393*************************************************/
3394
3395/* Input lines are read into buffer, but both patterns and data lines can be
3396continued over multiple input lines. In addition, if the buffer fills up, we
3397want to automatically expand it so as to be able to handle extremely large
3398lines that are needed for certain stress tests, although this is less likely
3399now that there are repetition features for both patterns and data. When the
3400input buffer is expanded, the other two buffers must also be expanded likewise,
3401and the contents of pbuffer, which are a copy of the input for callouts, must
3402be preserved (for when expansion happens for a data line). This is not the most
3403optimal way of handling this, but hey, this is just a test program!
3404
3405Arguments:
3406 f the file to read
3407 start where in buffer to start (this *must* be within buffer)
3408 prompt for stdin or readline()
3409
3410Returns: pointer to the start of new data
3411 could be a copy of start, or could be moved
3412 NULL if no data read and EOF reached
3413*/
3414
3415static uint8_t *
3416extend_inputline(FILE *f, uint8_t *start, const char *prompt)
3417{
3418uint8_t *here = start;
3419
3420for (;;)
3421 {
3422 size_t rlen = (size_t)(pbuffer8_size - (here - buffer));
3423
3424 if (rlen > 1000)
3425 {
3426 size_t dlen;
3427
3428 /* If libreadline or libedit support is required, use readline() to read a
3429 line if the input is a terminal. Note that readline() removes the trailing
3430 newline, so we must put it back again, to be compatible with fgets(). */
3431
3432#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
3433 if (INTERACTIVE(f))
3434 {
3435 size_t len;
3436 char *s = readline(prompt);
3437 if (s == NULL) return (here == start)? NULL : start;
3438 len = strlen(s);
3439 if (len > 0) add_history(s);
3440 if (len > rlen - 1) len = rlen - 1;
3441 memcpy(here, s, len);
3442 here[len] = '\n';
3443 here[len+1] = 0;
3444 free(s);
3445 }
3446 else
3447#endif
3448
3449 /* Read the next line by normal means, prompting if the file is a tty. */
3450
3451 {
3452 if (INTERACTIVE(f)) printf("%s", prompt);
3453 if (fgets((char *)here, rlen, f) == NULL)
3454 return (here == start)? NULL : start;
3455 }
3456
3457 dlen = strlen((char *)here);
3458 here += dlen;
3459
3460 /* Check for end of line reached. Take care not to read data from before
3461 start (dlen will be zero for a file starting with a binary zero). */
3462
3463 if (here > start && here[-1] == '\n') return start;
3464
3465 /* If we have not read a newline when reading a file, we have either filled
3466 the buffer or reached the end of the file. We can detect the former by
3467 checking that the string fills the buffer, and the latter by feof(). If
3468 neither of these is true, it means we read a binary zero which has caused
3469 strlen() to give a short length. This is a hard error because pcre2test
3470 expects to work with C strings. */
3471
3472 if (!INTERACTIVE(f) && dlen < rlen - 1 && !feof(f))
3473 {
3474 fprintf(outfile, "** Binary zero encountered in input\n");
3475 fprintf(outfile, "** pcre2test run abandoned\n");
3476 exit(1);
3477 }
3478 }
3479
3480 else
3481 {
3482 size_t start_offset = start - buffer;
3483 size_t here_offset = here - buffer;
3484 expand_input_buffers();
3485 start = buffer + start_offset;
3486 here = buffer + here_offset;
3487 }
3488 }
3489
3490/* Control never gets here */
3491}
3492
3493
3494
3495/*************************************************
3496* Case-independent strncmp() function *
3497*************************************************/
3498
3499/*
3500Arguments:
3501 s first string
3502 t second string
3503 n number of characters to compare
3504
3505Returns: < 0, = 0, or > 0, according to the comparison
3506*/
3507
3508static int
3509strncmpic(const uint8_t *s, const uint8_t *t, int n)
3510{
3511while (n--)
3512 {
3513 int c = tolower(*s++) - tolower(*t++);
3514 if (c != 0) return c;
3515 }
3516return 0;
3517}
3518
3519
3520
3521/*************************************************
3522* Scan the main modifier list *
3523*************************************************/
3524
3525/* This function searches the modifier list for a long modifier name.
3526
3527Argument:
3528 p start of the name
3529 lenp length of the name
3530
3531Returns: an index in the modifier list, or -1 on failure
3532*/
3533
3534static int
3535scan_modifiers(const uint8_t *p, unsigned int len)
3536{
3537int bot = 0;
3538int top = MODLISTCOUNT;
3539
3540while (top > bot)
3541 {
3542 int mid = (bot + top)/2;
3543 unsigned int mlen = strlen(modlist[mid].name);
3544 int c = strncmp((char *)p, modlist[mid].name, (len < mlen)? len : mlen);
3545 if (c == 0)
3546 {
3547 if (len == mlen) return mid;
3548 c = (int)len - (int)mlen;
3549 }
3550 if (c > 0) bot = mid + 1; else top = mid;
3551 }
3552
3553return -1;
3554
3555}
3556
3557
3558
3559/*************************************************
3560* Check a modifer and find its field *
3561*************************************************/
3562
3563/* This function is called when a modifier has been identified. We check that
3564it is allowed here and find the field that is to be changed.
3565
3566Arguments:
3567 m the modifier list entry
3568 ctx CTX_PAT => pattern context
3569 CTX_POPPAT => pattern context for popped pattern
3570 CTX_DEFPAT => default pattern context
3571 CTX_DAT => data context
3572 CTX_DEFDAT => default data context
3573 pctl point to pattern control block
3574 dctl point to data control block
3575 c a single character or 0
3576
3577Returns: a field pointer or NULL
3578*/
3579
3580static void *
3581check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
3582{
3583void *field = NULL;
3584PCRE2_SIZE offset = m->offset;
3585
3586if (restrict_for_perl_test) switch(m->which)
3587 {
3588 case MOD_PNDP:
3589 case MOD_PATP:
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07003590 case MOD_DATP:
Elliott Hughes5b808042021-10-01 10:56:10 -07003591 case MOD_PDP:
3592 break;
3593
3594 default:
3595 fprintf(outfile, "** '%s' is not allowed in a Perl-compatible test\n",
3596 m->name);
3597 return NULL;
3598 }
3599
3600switch (m->which)
3601 {
3602 case MOD_CTC: /* Compile context modifier */
3603 if (ctx == CTX_DEFPAT) field = PTR(default_pat_context);
3604 else if (ctx == CTX_PAT) field = PTR(pat_context);
3605 break;
3606
3607 case MOD_CTM: /* Match context modifier */
3608 if (ctx == CTX_DEFDAT) field = PTR(default_dat_context);
3609 else if (ctx == CTX_DAT) field = PTR(dat_context);
3610 break;
3611
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07003612 case MOD_DAT: /* Data line modifier */
3613 case MOD_DATP: /* Allowed for Perl test */
Elliott Hughes5b808042021-10-01 10:56:10 -07003614 if (dctl != NULL) field = dctl;
3615 break;
3616
3617 case MOD_PAT: /* Pattern modifier */
3618 case MOD_PATP: /* Allowed for Perl test */
3619 if (pctl != NULL) field = pctl;
3620 break;
3621
3622 case MOD_PD: /* Pattern or data line modifier */
3623 case MOD_PDP: /* Ditto, allowed for Perl test */
3624 case MOD_PND: /* Ditto, but not default pattern */
3625 case MOD_PNDP: /* Ditto, allowed for Perl test */
3626 if (dctl != NULL) field = dctl;
3627 else if (pctl != NULL && (m->which == MOD_PD || m->which == MOD_PDP ||
3628 ctx != CTX_DEFPAT))
3629 field = pctl;
3630 break;
3631 }
3632
3633if (field == NULL)
3634 {
3635 if (c == 0)
3636 fprintf(outfile, "** '%s' is not valid here\n", m->name);
3637 else
3638 fprintf(outfile, "** /%c is not valid here\n", c);
3639 return NULL;
3640 }
3641
3642return (char *)field + offset;
3643}
3644
3645
3646
3647/*************************************************
3648* Decode a modifier list *
3649*************************************************/
3650
3651/* A pointer to a control block is NULL when called in cases when that block is
3652not relevant. They are never all relevant in one call. At least one of patctl
3653and datctl is NULL. The second argument specifies which context to use for
3654modifiers that apply to contexts.
3655
3656Arguments:
3657 p point to modifier string
3658 ctx CTX_PAT => pattern context
3659 CTX_POPPAT => pattern context for popped pattern
3660 CTX_DEFPAT => default pattern context
3661 CTX_DAT => data context
3662 CTX_DEFDAT => default data context
3663 pctl point to pattern control block
3664 dctl point to data control block
3665
3666Returns: TRUE if successful decode, FALSE otherwise
3667*/
3668
3669static BOOL
3670decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl)
3671{
3672uint8_t *ep, *pp;
3673long li;
3674unsigned long uli;
3675BOOL first = TRUE;
3676
3677for (;;)
3678 {
3679 void *field;
3680 modstruct *m;
3681 BOOL off = FALSE;
3682 unsigned int i, len;
3683 int index;
3684 char *endptr;
3685
3686 /* Skip white space and commas. */
3687
3688 while (isspace(*p) || *p == ',') p++;
3689 if (*p == 0) break;
3690
3691 /* Find the end of the item; lose trailing whitespace at end of line. */
3692
3693 for (ep = p; *ep != 0 && *ep != ','; ep++);
3694 if (*ep == 0)
3695 {
3696 while (ep > p && isspace(ep[-1])) ep--;
3697 *ep = 0;
3698 }
3699
3700 /* Remember if the first character is '-'. */
3701
3702 if (*p == '-')
3703 {
3704 off = TRUE;
3705 p++;
3706 }
3707
3708 /* Find the length of a full-length modifier name, and scan for it. */
3709
3710 pp = p;
3711 while (pp < ep && *pp != '=') pp++;
3712 index = scan_modifiers(p, pp - p);
3713
3714 /* If the first modifier is unrecognized, try to interpret it as a sequence
3715 of single-character abbreviated modifiers. None of these modifiers have any
3716 associated data. They just set options or control bits. */
3717
3718 if (index < 0)
3719 {
3720 uint32_t cc;
3721 uint8_t *mp = p;
3722
3723 if (!first)
3724 {
3725 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3726 if (ep - p == 1)
3727 fprintf(outfile, "** Single-character modifiers must come first\n");
3728 return FALSE;
3729 }
3730
3731 for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p))
3732 {
3733 for (i = 0; i < C1MODLISTCOUNT; i++)
3734 if (cc == c1modlist[i].onechar) break;
3735
3736 if (i >= C1MODLISTCOUNT)
3737 {
3738 fprintf(outfile, "** Unrecognized modifier '%c' in '%.*s'\n",
3739 *p, (int)(ep-mp), mp);
3740 return FALSE;
3741 }
3742
3743 if (c1modlist[i].index >= 0)
3744 {
3745 index = c1modlist[i].index;
3746 }
3747
3748 else
3749 {
3750 index = scan_modifiers((uint8_t *)(c1modlist[i].fullname),
3751 strlen(c1modlist[i].fullname));
3752 if (index < 0)
3753 {
3754 fprintf(outfile, "** Internal error: single-character equivalent "
3755 "modifier '%s' not found\n", c1modlist[i].fullname);
3756 return FALSE;
3757 }
3758 c1modlist[i].index = index; /* Cache for next time */
3759 }
3760
3761 field = check_modifier(modlist + index, ctx, pctl, dctl, *p);
3762 if (field == NULL) return FALSE;
3763
3764 /* /x is a special case; a second appearance changes PCRE2_EXTENDED to
3765 PCRE2_EXTENDED_MORE. */
3766
3767 if (cc == 'x' && (*((uint32_t *)field) & PCRE2_EXTENDED) != 0)
3768 {
3769 *((uint32_t *)field) &= ~PCRE2_EXTENDED;
3770 *((uint32_t *)field) |= PCRE2_EXTENDED_MORE;
3771 }
3772 else
3773 *((uint32_t *)field) |= modlist[index].value;
3774 }
3775
3776 continue; /* With tne next (fullname) modifier */
3777 }
3778
3779 /* We have a match on a full-name modifier. Check for the existence of data
3780 when needed. */
3781
3782 m = modlist + index; /* Save typing */
3783 if (m->type != MOD_CTL && m->type != MOD_OPT &&
3784 (m->type != MOD_IND || *pp == '='))
3785 {
3786 if (*pp++ != '=')
3787 {
3788 fprintf(outfile, "** '=' expected after '%s'\n", m->name);
3789 return FALSE;
3790 }
3791 if (off)
3792 {
3793 fprintf(outfile, "** '-' is not valid for '%s'\n", m->name);
3794 return FALSE;
3795 }
3796 }
3797
3798 /* These on/off types have no data. */
3799
3800 else if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3801 {
3802 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3803 return FALSE;
3804 }
3805
3806 /* Set the data length for those types that have data. Then find the field
3807 that is to be set. If check_modifier() returns NULL, it has already output an
3808 error message. */
3809
3810 len = ep - pp;
3811 field = check_modifier(m, ctx, pctl, dctl, 0);
3812 if (field == NULL) return FALSE;
3813
3814 /* Process according to data type. */
3815
3816 switch (m->type)
3817 {
3818 case MOD_CTL:
3819 case MOD_OPT:
3820 if (off) *((uint32_t *)field) &= ~m->value;
3821 else *((uint32_t *)field) |= m->value;
3822 break;
3823
3824 case MOD_BSR:
3825 if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0)
3826 {
3827#ifdef BSR_ANYCRLF
3828 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3829#else
3830 *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3831#endif
3832 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_BSR_SET;
3833 else dctl->control2 &= ~CTL2_BSR_SET;
3834 }
3835 else
3836 {
3837 if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0)
3838 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3839 else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
3840 *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3841 else goto INVALID_VALUE;
3842 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_BSR_SET;
3843 else dctl->control2 |= CTL2_BSR_SET;
3844 }
3845 pp = ep;
3846 break;
3847
3848 case MOD_CHR: /* A single character */
3849 *((uint32_t *)field) = *pp++;
3850 break;
3851
3852 case MOD_CON: /* A convert type/options list */
3853 for (;; pp++)
3854 {
3855 uint8_t *colon = (uint8_t *)strchr((const char *)pp, ':');
3856 len = ((colon != NULL && colon < ep)? colon:ep) - pp;
3857 for (i = 0; i < convertlistcount; i++)
3858 {
3859 if (strncmpic(pp, (const uint8_t *)convertlist[i].name, len) == 0)
3860 {
3861 if (*((uint32_t *)field) == CONVERT_UNSET)
3862 *((uint32_t *)field) = convertlist[i].option;
3863 else
3864 *((uint32_t *)field) |= convertlist[i].option;
3865 break;
3866 }
3867 }
3868 if (i >= convertlistcount) goto INVALID_VALUE;
3869 pp += len;
3870 if (*pp != ':') break;
3871 }
3872 break;
3873
3874 case MOD_IN2: /* One or two unsigned integers */
3875 if (!isdigit(*pp)) goto INVALID_VALUE;
3876 uli = strtoul((const char *)pp, &endptr, 10);
3877 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3878 ((uint32_t *)field)[0] = (uint32_t)uli;
3879 if (*endptr == ':')
3880 {
3881 uli = strtoul((const char *)endptr+1, &endptr, 10);
3882 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3883 ((uint32_t *)field)[1] = (uint32_t)uli;
3884 }
3885 else ((uint32_t *)field)[1] = 0;
3886 pp = (uint8_t *)endptr;
3887 break;
3888
3889 /* PCRE2_SIZE_MAX is usually SIZE_MAX, which may be greater, equal to, or
3890 less than ULONG_MAX. So first test for overflowing the long int, and then
3891 test for overflowing PCRE2_SIZE_MAX if it is smaller than ULONG_MAX. */
3892
3893 case MOD_SIZ: /* PCRE2_SIZE value */
3894 if (!isdigit(*pp)) goto INVALID_VALUE;
3895 uli = strtoul((const char *)pp, &endptr, 10);
3896 if (uli == ULONG_MAX) goto INVALID_VALUE;
3897#if ULONG_MAX > PCRE2_SIZE_MAX
3898 if (uli > PCRE2_SIZE_MAX) goto INVALID_VALUE;
3899#endif
3900 *((PCRE2_SIZE *)field) = (PCRE2_SIZE)uli;
3901 pp = (uint8_t *)endptr;
3902 break;
3903
3904 case MOD_IND: /* Unsigned integer with default */
3905 if (len == 0)
3906 {
3907 *((uint32_t *)field) = (uint32_t)(m->value);
3908 break;
3909 }
3910 /* Fall through */
3911
3912 case MOD_INT: /* Unsigned integer */
3913 if (!isdigit(*pp)) goto INVALID_VALUE;
3914 uli = strtoul((const char *)pp, &endptr, 10);
3915 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3916 *((uint32_t *)field) = (uint32_t)uli;
3917 pp = (uint8_t *)endptr;
3918 break;
3919
3920 case MOD_INS: /* Signed integer */
3921 if (!isdigit(*pp) && *pp != '-') goto INVALID_VALUE;
3922 li = strtol((const char *)pp, &endptr, 10);
3923 if (S32OVERFLOW(li)) goto INVALID_VALUE;
3924 *((int32_t *)field) = (int32_t)li;
3925 pp = (uint8_t *)endptr;
3926 break;
3927
3928 case MOD_NL:
3929 for (i = 0; i < sizeof(newlines)/sizeof(char *); i++)
3930 if (len == strlen(newlines[i]) &&
3931 strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break;
3932 if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE;
3933 if (i == 0)
3934 {
3935 *((uint16_t *)field) = NEWLINE_DEFAULT;
3936 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_NL_SET;
3937 else dctl->control2 &= ~CTL2_NL_SET;
3938 }
3939 else
3940 {
3941 *((uint16_t *)field) = i;
3942 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_NL_SET;
3943 else dctl->control2 |= CTL2_NL_SET;
3944 }
3945 pp = ep;
3946 break;
3947
3948 case MOD_NN: /* Name or (signed) number; may be several */
3949 if (isdigit(*pp) || *pp == '-')
3950 {
3951 int ct = MAXCPYGET - 1;
3952 int32_t value;
3953 li = strtol((const char *)pp, &endptr, 10);
3954 if (S32OVERFLOW(li)) goto INVALID_VALUE;
3955 value = (int32_t)li;
3956 field = (char *)field - m->offset + m->value; /* Adjust field ptr */
3957 if (value >= 0) /* Add new number */
3958 {
3959 while (*((int32_t *)field) >= 0 && ct-- > 0) /* Skip previous */
3960 field = (char *)field + sizeof(int32_t);
3961 if (ct <= 0)
3962 {
3963 fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name);
3964 return FALSE;
3965 }
3966 }
3967 *((int32_t *)field) = value;
3968 if (ct > 0) ((int32_t *)field)[1] = -1;
3969 pp = (uint8_t *)endptr;
3970 }
3971
3972 /* Multiple strings are put end to end. */
3973
3974 else
3975 {
3976 char *nn = (char *)field;
3977 if (len > 0) /* Add new name */
3978 {
3979 if (len > MAX_NAME_SIZE)
3980 {
3981 fprintf(outfile, "** Group name in '%s' is too long\n", m->name);
3982 return FALSE;
3983 }
3984 while (*nn != 0) nn += strlen(nn) + 1;
3985 if (nn + len + 2 - (char *)field > LENCPYGET)
3986 {
3987 fprintf(outfile, "** Too many characters in named '%s' modifiers\n",
3988 m->name);
3989 return FALSE;
3990 }
3991 memcpy(nn, pp, len);
3992 }
3993 nn[len] = 0 ;
3994 nn[len+1] = 0;
3995 pp = ep;
3996 }
3997 break;
3998
3999 case MOD_STR:
4000 if (len + 1 > m->value)
4001 {
4002 fprintf(outfile, "** Overlong value for '%s' (max %d code units)\n",
4003 m->name, m->value - 1);
4004 return FALSE;
4005 }
4006 memcpy(field, pp, len);
4007 ((uint8_t *)field)[len] = 0;
4008 pp = ep;
4009 break;
4010 }
4011
4012 if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
4013 {
4014 fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name);
4015 return FALSE;
4016 }
4017
4018 p = pp;
4019 first = FALSE;
4020
4021 if (ctx == CTX_POPPAT &&
4022 (pctl->options != 0 ||
4023 pctl->tables_id != 0 ||
4024 pctl->locale[0] != 0 ||
4025 (pctl->control & NOTPOP_CONTROLS) != 0))
4026 {
4027 fprintf(outfile, "** '%s' is not valid here\n", m->name);
4028 return FALSE;
4029 }
4030 }
4031
4032return TRUE;
4033
4034INVALID_VALUE:
4035fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p);
4036return FALSE;
4037}
4038
4039
4040/*************************************************
4041* Get info from a pattern *
4042*************************************************/
4043
4044/* A wrapped call to pcre2_pattern_info(), applied to the current compiled
4045pattern.
4046
4047Arguments:
4048 what code for the required information
4049 where where to put the answer
4050 unsetok PCRE2_ERROR_UNSET is an "expected" result
4051
4052Returns: the return from pcre2_pattern_info()
4053*/
4054
4055static int
4056pattern_info(int what, void *where, BOOL unsetok)
4057{
4058int rc;
4059PCRE2_PATTERN_INFO(rc, compiled_code, what, NULL); /* Exercise the code */
4060PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
4061if (rc >= 0) return 0;
4062if (rc != PCRE2_ERROR_UNSET || !unsetok)
4063 {
4064 fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
4065 what);
4066 if (rc == PCRE2_ERROR_BADMODE)
4067 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
4068 "%d-bit mode\n", test_mode,
4069 8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
4070 }
4071return rc;
4072}
4073
4074
4075
4076#ifdef SUPPORT_PCRE2_8
4077/*************************************************
4078* Show something in a list *
4079*************************************************/
4080
4081/* This function just helps to keep the code that uses it tidier. It's used for
4082various lists of things where there needs to be introductory text before the
4083first item. As these calls are all in the POSIX-support code, they happen only
4084when 8-bit mode is supported. */
4085
4086static void
4087prmsg(const char **msg, const char *s)
4088{
4089fprintf(outfile, "%s %s", *msg, s);
4090*msg = "";
4091}
4092#endif /* SUPPORT_PCRE2_8 */
4093
4094
4095
4096/*************************************************
4097* Show control bits *
4098*************************************************/
4099
4100/* Called for mutually exclusive controls and for unsupported POSIX controls.
4101Because the bits are unique, this can be used for both pattern and data control
4102words.
4103
4104Arguments:
4105 controls control bits
4106 controls2 more control bits
4107 before text to print before
4108
4109Returns: nothing
4110*/
4111
4112static void
4113show_controls(uint32_t controls, uint32_t controls2, const char *before)
4114{
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004115fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
Elliott Hughes5b808042021-10-01 10:56:10 -07004116 before,
4117 ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
4118 ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
4119 ((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "",
4120 ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
4121 ((controls2 & CTL2_ALLVECTOR) != 0)? " allvector" : "",
4122 ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
4123 ((controls & CTL_BINCODE) != 0)? " bincode" : "",
4124 ((controls2 & CTL2_BSR_SET) != 0)? " bsr" : "",
4125 ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
4126 ((controls2 & CTL2_CALLOUT_EXTRA) != 0)? " callout_extra" : "",
4127 ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
4128 ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
4129 ((controls2 & CTL2_CALLOUT_NO_WHERE) != 0)? " callout_no_where" : "",
4130 ((controls & CTL_DFA) != 0)? " dfa" : "",
4131 ((controls & CTL_EXPAND) != 0)? " expand" : "",
4132 ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "",
4133 ((controls & CTL_FRAMESIZE) != 0)? " framesize" : "",
4134 ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "",
4135 ((controls & CTL_GETALL) != 0)? " getall" : "",
4136 ((controls & CTL_GLOBAL) != 0)? " global" : "",
4137 ((controls & CTL_HEXPAT) != 0)? " hex" : "",
4138 ((controls & CTL_INFO) != 0)? " info" : "",
4139 ((controls & CTL_JITFAST) != 0)? " jitfast" : "",
4140 ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
4141 ((controls & CTL_MARK) != 0)? " mark" : "",
4142 ((controls & CTL_MEMORY) != 0)? " memory" : "",
4143 ((controls2 & CTL2_NL_SET) != 0)? " newline" : "",
4144 ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "",
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004145 ((controls2 & CTL2_NULL_REPLACEMENT) != 0)? " null_replacement" : "",
4146 ((controls2 & CTL2_NULL_SUBJECT) != 0)? " null_subject" : "",
Elliott Hughes5b808042021-10-01 10:56:10 -07004147 ((controls & CTL_POSIX) != 0)? " posix" : "",
4148 ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "",
4149 ((controls & CTL_PUSH) != 0)? " push" : "",
4150 ((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
4151 ((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "",
4152 ((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
4153 ((controls2 & CTL2_SUBSTITUTE_CALLOUT) != 0)? " substitute_callout" : "",
4154 ((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
4155 ((controls2 & CTL2_SUBSTITUTE_LITERAL) != 0)? " substitute_literal" : "",
4156 ((controls2 & CTL2_SUBSTITUTE_MATCHED) != 0)? " substitute_matched" : "",
4157 ((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
4158 ((controls2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) != 0)? " substitute_replacement_only" : "",
4159 ((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
4160 ((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
4161 ((controls & CTL_USE_LENGTH) != 0)? " use_length" : "",
4162 ((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "",
4163 ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
4164}
4165
4166
4167
4168/*************************************************
4169* Show compile options *
4170*************************************************/
4171
4172/* Called from show_pattern_info() and for unsupported POSIX options.
4173
4174Arguments:
4175 options an options word
4176 before text to print before
4177 after text to print after
4178
4179Returns: nothing
4180*/
4181
4182static void
4183show_compile_options(uint32_t options, const char *before, const char *after)
4184{
4185if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4186else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4187 before,
4188 ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
4189 ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
4190 ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
4191 ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
4192 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4193 ((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
4194 ((options & PCRE2_CASELESS) != 0)? " caseless" : "",
4195 ((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4196 ((options & PCRE2_DOTALL) != 0)? " dotall" : "",
4197 ((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
4198 ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4199 ((options & PCRE2_EXTENDED) != 0)? " extended" : "",
4200 ((options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
4201 ((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
4202 ((options & PCRE2_LITERAL) != 0)? " literal" : "",
4203 ((options & PCRE2_MATCH_INVALID_UTF) != 0)? " match_invalid_utf" : "",
4204 ((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
4205 ((options & PCRE2_MULTILINE) != 0)? " multiline" : "",
4206 ((options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
4207 ((options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
4208 ((options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
4209 ((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4210 ((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
4211 ((options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
4212 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4213 ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4214 ((options & PCRE2_UCP) != 0)? " ucp" : "",
4215 ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
4216 ((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
4217 ((options & PCRE2_UTF) != 0)? " utf" : "",
4218 after);
4219}
4220
4221
4222/*************************************************
4223* Show compile extra options *
4224*************************************************/
4225
4226/* Called from show_pattern_info() and for unsupported POSIX options.
4227
4228Arguments:
4229 options an options word
4230 before text to print before
4231 after text to print after
4232
4233Returns: nothing
4234*/
4235
4236static void
4237show_compile_extra_options(uint32_t options, const char *before,
4238 const char *after)
4239{
4240if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4241else fprintf(outfile, "%s%s%s%s%s%s%s%s",
4242 before,
4243 ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "",
4244 ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "",
4245 ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " extra_alt_bsux" : "",
4246 ((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "",
4247 ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "",
4248 ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "",
4249 after);
4250}
4251
4252
4253
4254#ifdef SUPPORT_PCRE2_8
4255/*************************************************
4256* Show match options *
4257*************************************************/
4258
4259/* Called for unsupported POSIX options. */
4260
4261static void
4262show_match_options(uint32_t options)
4263{
4264fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s",
4265 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4266 ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)? " copy_matched_subject" : "",
4267 ((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "",
4268 ((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "",
4269 ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4270 ((options & PCRE2_NO_JIT) != 0)? " no_jit" : "",
4271 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4272 ((options & PCRE2_NOTBOL) != 0)? " notbol" : "",
4273 ((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
4274 ((options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
4275 ((options & PCRE2_NOTEOL) != 0)? " noteol" : "",
4276 ((options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
4277 ((options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
4278}
4279#endif /* SUPPORT_PCRE2_8 */
4280
4281
4282
4283/*************************************************
4284* Show memory usage info for a pattern *
4285*************************************************/
4286
4287static void
4288show_memory_info(void)
4289{
4290uint32_t name_count, name_entry_size;
4291size_t size, cblock_size;
4292
4293/* One of the test_mode values will always be true, but to stop a compiler
4294warning we must initialize cblock_size. */
4295
4296cblock_size = 0;
4297#ifdef SUPPORT_PCRE2_8
4298if (test_mode == PCRE8_MODE) cblock_size = sizeof(pcre2_real_code_8);
4299#endif
4300#ifdef SUPPORT_PCRE2_16
4301if (test_mode == PCRE16_MODE) cblock_size = sizeof(pcre2_real_code_16);
4302#endif
4303#ifdef SUPPORT_PCRE2_32
4304if (test_mode == PCRE32_MODE) cblock_size = sizeof(pcre2_real_code_32);
4305#endif
4306
4307(void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
4308(void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE);
4309(void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE);
4310fprintf(outfile, "Memory allocation (code space): %d\n",
4311 (int)(size - name_count*name_entry_size*code_unit_size - cblock_size));
4312if (pat_patctl.jit != 0)
4313 {
4314 (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE);
4315 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size);
4316 }
4317}
4318
4319
4320
4321/*************************************************
4322* Show frame size info for a pattern *
4323*************************************************/
4324
4325static void
4326show_framesize(void)
4327{
4328size_t frame_size;
4329(void)pattern_info(PCRE2_INFO_FRAMESIZE, &frame_size, FALSE);
4330fprintf(outfile, "Frame size for pcre2_match(): %d\n", (int)frame_size);
4331}
4332
4333
4334
4335/*************************************************
4336* Get and output an error message *
4337*************************************************/
4338
4339static BOOL
4340print_error_message(int errorcode, const char *before, const char *after)
4341{
4342int len;
4343PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
4344if (len < 0)
4345 {
4346 fprintf(outfile, "\n** pcre2test internal error: cannot interpret error "
4347 "number\n** Unexpected return (%d) from pcre2_get_error_message()\n", len);
4348 }
4349else
4350 {
4351 fprintf(outfile, "%s", before);
4352 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4353 fprintf(outfile, "%s", after);
4354 }
4355return len >= 0;
4356}
4357
4358
4359/*************************************************
4360* Callback function for callout enumeration *
4361*************************************************/
4362
4363/* The only differences in the callout emumeration block for different code
4364unit widths are that the pointers to the subject, the most recent MARK, and a
4365callout argument string point to strings of the appropriate width. Casts can be
4366used to deal with this.
4367
4368Argument:
4369 cb pointer to enumerate block
4370 callout_data user data
4371
4372Returns: 0
4373*/
4374
4375static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
4376 void *callout_data)
4377{
4378uint32_t i;
4379BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4380
4381(void)callout_data; /* Not currently displayed */
4382
4383fprintf(outfile, "Callout ");
4384if (cb->callout_string != NULL)
4385 {
4386 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
4387 fprintf(outfile, "%c", delimiter);
4388 PCHARSV(cb->callout_string, 0,
4389 cb->callout_string_length, utf, outfile);
4390 for (i = 0; callout_start_delims[i] != 0; i++)
4391 if (delimiter == callout_start_delims[i])
4392 {
4393 delimiter = callout_end_delims[i];
4394 break;
4395 }
4396 fprintf(outfile, "%c ", delimiter);
4397 }
4398else fprintf(outfile, "%d ", cb->callout_number);
4399
4400fprintf(outfile, "%.*s\n",
4401 (int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
4402 pbuffer8 + cb->pattern_position);
4403
4404return 0;
4405}
4406
4407
4408
4409/*************************************************
4410* Show information about a pattern *
4411*************************************************/
4412
4413/* This function is called after a pattern has been compiled if any of the
4414information-requesting controls have been set.
4415
4416Arguments: none
4417
4418Returns: PR_OK continue processing next line
4419 PR_SKIP skip to a blank line
4420 PR_ABEND abort the pcre2test run
4421*/
4422
4423static int
4424show_pattern_info(void)
4425{
4426uint32_t compile_options, overall_options, extra_options;
4427BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4428
4429if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
4430 {
4431 fprintf(outfile, "------------------------------------------------------------------\n");
4432 PCRE2_PRINTINT((pat_patctl.control & CTL_FULLBINCODE) != 0);
4433 }
4434
4435if ((pat_patctl.control & CTL_INFO) != 0)
4436 {
4437 int rc;
4438 void *nametable;
4439 uint8_t *start_bits;
4440 BOOL heap_limit_set, match_limit_set, depth_limit_set;
4441 uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
4442 hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
4443 depth_limit, heap_limit, match_limit, minlength, nameentrysize, namecount,
4444 newline_convention;
4445
4446 /* Exercise the error route. */
4447
4448 PCRE2_PATTERN_INFO(rc, compiled_code, 999, NULL);
4449 (void)rc;
4450
4451 /* These info requests may return PCRE2_ERROR_UNSET. */
4452
4453 switch(pattern_info(PCRE2_INFO_HEAPLIMIT, &heap_limit, TRUE))
4454 {
4455 case 0:
4456 heap_limit_set = TRUE;
4457 break;
4458
4459 case PCRE2_ERROR_UNSET:
4460 heap_limit_set = FALSE;
4461 break;
4462
4463 default:
4464 return PR_ABEND;
4465 }
4466
4467 switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
4468 {
4469 case 0:
4470 match_limit_set = TRUE;
4471 break;
4472
4473 case PCRE2_ERROR_UNSET:
4474 match_limit_set = FALSE;
4475 break;
4476
4477 default:
4478 return PR_ABEND;
4479 }
4480
4481 switch(pattern_info(PCRE2_INFO_DEPTHLIMIT, &depth_limit, TRUE))
4482 {
4483 case 0:
4484 depth_limit_set = TRUE;
4485 break;
4486
4487 case PCRE2_ERROR_UNSET:
4488 depth_limit_set = FALSE;
4489 break;
4490
4491 default:
4492 return PR_ABEND;
4493 }
4494
4495 /* These info requests should always succeed. */
4496
4497 if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) +
4498 pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) +
4499 pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) +
4500 pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
4501 pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
4502 pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
4503 pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) +
4504 pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
4505 pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
4506 pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
4507 pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
4508 pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
4509 pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
4510 pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
4511 pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
4512 pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) +
4513 pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE)
4514 != 0)
4515 return PR_ABEND;
4516
4517 fprintf(outfile, "Capture group count = %d\n", capture_count);
4518
4519 if (backrefmax > 0)
4520 fprintf(outfile, "Max back reference = %d\n", backrefmax);
4521
4522 if (maxlookbehind > 0)
4523 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4524
4525 if (heap_limit_set)
4526 fprintf(outfile, "Heap limit = %u\n", heap_limit);
4527
4528 if (match_limit_set)
4529 fprintf(outfile, "Match limit = %u\n", match_limit);
4530
4531 if (depth_limit_set)
4532 fprintf(outfile, "Depth limit = %u\n", depth_limit);
4533
4534 if (namecount > 0)
4535 {
4536 fprintf(outfile, "Named capture groups:\n");
4537 for (; namecount > 0; namecount--)
4538 {
4539 int imm2_size = test_mode == PCRE8_MODE ? 2 : 1;
4540 uint32_t length = (uint32_t)STRLEN(nametable + imm2_size);
4541 fprintf(outfile, " ");
4542
4543 /* In UTF mode the name may be a UTF string containing non-ASCII
4544 letters and digits. We must output it as a UTF-8 string. In non-UTF mode,
4545 use the normal string printing functions, which use escapes for all
4546 non-ASCII characters. */
4547
4548 if (utf)
4549 {
4550#ifdef SUPPORT_PCRE2_32
4551 if (test_mode == PCRE32_MODE)
4552 {
4553 PCRE2_SPTR32 nameptr = (PCRE2_SPTR32)nametable + imm2_size;
4554 while (*nameptr != 0)
4555 {
4556 uint8_t u8buff[6];
4557 int len = ord2utf8(*nameptr++, u8buff);
4558 fprintf(outfile, "%.*s", len, u8buff);
4559 }
4560 }
4561#endif
4562#ifdef SUPPORT_PCRE2_16
4563 if (test_mode == PCRE16_MODE)
4564 {
4565 PCRE2_SPTR16 nameptr = (PCRE2_SPTR16)nametable + imm2_size;
4566 while (*nameptr != 0)
4567 {
4568 int len;
4569 uint8_t u8buff[6];
4570 uint32_t c = *nameptr++ & 0xffff;
4571 if (c >= 0xD800 && c < 0xDC00)
4572 c = ((c & 0x3ff) << 10) + (*nameptr++ & 0x3ff) + 0x10000;
4573 len = ord2utf8(c, u8buff);
4574 fprintf(outfile, "%.*s", len, u8buff);
4575 }
4576 }
4577#endif
4578#ifdef SUPPORT_PCRE2_8
4579 if (test_mode == PCRE8_MODE)
4580 fprintf(outfile, "%s", (PCRE2_SPTR8)nametable + imm2_size);
4581#endif
4582 }
4583 else /* Not UTF mode */
4584 {
4585 PCHARSV(nametable, imm2_size, length, FALSE, outfile);
4586 }
4587
4588 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4589
4590#ifdef SUPPORT_PCRE2_32
4591 if (test_mode == PCRE32_MODE)
4592 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0]));
4593#endif
4594#ifdef SUPPORT_PCRE2_16
4595 if (test_mode == PCRE16_MODE)
4596 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0]));
4597#endif
4598#ifdef SUPPORT_PCRE2_8
4599 if (test_mode == PCRE8_MODE)
4600 fprintf(outfile, "%3d\n", (int)(
4601 ((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1]));
4602#endif
4603
4604 nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size);
4605 }
4606 }
4607
4608 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4609 if (hasbackslashc) fprintf(outfile, "Contains \\C\n");
4610 if (match_empty) fprintf(outfile, "May match empty string\n");
4611
4612 pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
4613 pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
4614 pattern_info(PCRE2_INFO_EXTRAOPTIONS, &extra_options, FALSE);
4615
4616 /* Remove UTF/UCP if they were there only because of forbid_utf. This saves
4617 cluttering up the verification output of non-UTF test files. */
4618
4619 if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
4620 {
4621 compile_options &= ~PCRE2_NEVER_UTF;
4622 overall_options &= ~PCRE2_NEVER_UTF;
4623 }
4624
4625 if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
4626 {
4627 compile_options &= ~PCRE2_NEVER_UCP;
4628 overall_options &= ~PCRE2_NEVER_UCP;
4629 }
4630
4631 if ((compile_options|overall_options) != 0)
4632 {
4633 if (compile_options == overall_options)
4634 show_compile_options(compile_options, "Options:", "\n");
4635 else
4636 {
4637 show_compile_options(compile_options, "Compile options:", "\n");
4638 show_compile_options(overall_options, "Overall options:", "\n");
4639 }
4640 }
4641
4642 if (extra_options != 0)
4643 show_compile_extra_options(extra_options, "Extra options:", "\n");
4644
4645 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4646
4647 if ((pat_patctl.control2 & CTL2_BSR_SET) != 0 ||
4648 (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
4649 fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
4650 "any Unicode newline" : "CR, LF, or CRLF");
4651
4652 if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0)
4653 {
4654 switch (newline_convention)
4655 {
4656 case PCRE2_NEWLINE_CR:
4657 fprintf(outfile, "Forced newline is CR\n");
4658 break;
4659
4660 case PCRE2_NEWLINE_LF:
4661 fprintf(outfile, "Forced newline is LF\n");
4662 break;
4663
4664 case PCRE2_NEWLINE_CRLF:
4665 fprintf(outfile, "Forced newline is CRLF\n");
4666 break;
4667
4668 case PCRE2_NEWLINE_ANYCRLF:
4669 fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
4670 break;
4671
4672 case PCRE2_NEWLINE_ANY:
4673 fprintf(outfile, "Forced newline is any Unicode newline\n");
4674 break;
4675
4676 case PCRE2_NEWLINE_NUL:
4677 fprintf(outfile, "Forced newline is NUL\n");
4678 break;
4679
4680 default:
4681 break;
4682 }
4683 }
4684
4685 if (first_ctype == 2)
4686 {
4687 fprintf(outfile, "First code unit at start or follows newline\n");
4688 }
4689 else if (first_ctype == 1)
4690 {
4691 const char *caseless =
4692 ((FLD(compiled_code, flags) & PCRE2_FIRSTCASELESS) == 0)?
4693 "" : " (caseless)";
4694 if (PRINTOK(first_cunit))
4695 fprintf(outfile, "First code unit = \'%c\'%s\n", first_cunit, caseless);
4696 else
4697 {
4698 fprintf(outfile, "First code unit = ");
4699 pchar(first_cunit, FALSE, outfile);
4700 fprintf(outfile, "%s\n", caseless);
4701 }
4702 }
4703 else if (start_bits != NULL)
4704 {
4705 int i;
4706 int c = 24;
4707 fprintf(outfile, "Starting code units: ");
4708 for (i = 0; i < 256; i++)
4709 {
4710 if ((start_bits[i/8] & (1u << (i&7))) != 0)
4711 {
4712 if (c > 75)
4713 {
4714 fprintf(outfile, "\n ");
4715 c = 2;
4716 }
4717 if (PRINTOK(i) && i != ' ')
4718 {
4719 fprintf(outfile, "%c ", i);
4720 c += 2;
4721 }
4722 else
4723 {
4724 fprintf(outfile, "\\x%02x ", i);
4725 c += 5;
4726 }
4727 }
4728 }
4729 fprintf(outfile, "\n");
4730 }
4731
4732 if (last_ctype != 0)
4733 {
4734 const char *caseless =
4735 ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
4736 "" : " (caseless)";
4737 if (PRINTOK(last_cunit))
4738 fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
4739 else
4740 {
4741 fprintf(outfile, "Last code unit = ");
4742 pchar(last_cunit, FALSE, outfile);
4743 fprintf(outfile, "%s\n", caseless);
4744 }
4745 }
4746
4747 if ((FLD(compiled_code, overall_options) & PCRE2_NO_START_OPTIMIZE) == 0)
4748 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4749
4750 if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
4751 {
4752 if (FLD(compiled_code, executable_jit) != NULL)
4753 fprintf(outfile, "JIT compilation was successful\n");
4754 else
4755 {
4756#ifdef SUPPORT_JIT
4757 fprintf(outfile, "JIT compilation was not successful");
4758 if (jitrc != 0 && !print_error_message(jitrc, " (", ")"))
4759 return PR_ABEND;
4760 fprintf(outfile, "\n");
4761#else
4762 fprintf(outfile, "JIT support is not available in this version of PCRE2\n");
4763#endif
4764 }
4765 }
4766 }
4767
4768if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
4769 {
4770 int errorcode;
4771 PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0);
4772 if (errorcode != 0)
4773 {
4774 fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
4775 if (errorcode < 0 && !print_error_message(errorcode, "", "\n"))
4776 return PR_ABEND;
4777 return PR_SKIP;
4778 }
4779 }
4780
4781return PR_OK;
4782}
4783
4784
4785
4786/*************************************************
4787* Handle serialization error *
4788*************************************************/
4789
4790/* Print an error message after a serialization failure.
4791
4792Arguments:
4793 rc the error code
4794 msg an initial message for what failed
4795
4796Returns: FALSE if print_error_message() fails
4797*/
4798
4799static BOOL
4800serial_error(int rc, const char *msg)
4801{
4802fprintf(outfile, "%s failed: error %d: ", msg, rc);
4803return print_error_message(rc, "", "\n");
4804}
4805
4806
4807
4808/*************************************************
4809* Open file for save/load commands *
4810*************************************************/
4811
4812/* This function decodes the file name and opens the file.
4813
4814Arguments:
4815 buffptr point after the #command
4816 mode open mode
4817 fptr points to the FILE variable
4818 name name of # command
4819
4820Returns: PR_OK or PR_ABEND
4821*/
4822
4823static int
4824open_file(uint8_t *buffptr, const char *mode, FILE **fptr, const char *name)
4825{
4826char *endf;
4827char *filename = (char *)buffptr;
4828while (isspace(*filename)) filename++;
4829endf = filename + strlen8(filename);
4830while (endf > filename && isspace(endf[-1])) endf--;
4831
4832if (endf == filename)
4833 {
4834 fprintf(outfile, "** File name expected after %s\n", name);
4835 return PR_ABEND;
4836 }
4837
4838*endf = 0;
4839*fptr = fopen((const char *)filename, mode);
4840if (*fptr == NULL)
4841 {
4842 fprintf(outfile, "** Failed to open '%s': %s\n", filename, strerror(errno));
4843 return PR_ABEND;
4844 }
4845
4846return PR_OK;
4847}
4848
4849
4850
4851/*************************************************
4852* Process command line *
4853*************************************************/
4854
4855/* This function is called for lines beginning with # and a character that is
4856not ! or whitespace, when encountered between tests, which means that there is
4857no compiled pattern (compiled_code is NULL). The line is in buffer.
4858
4859Arguments: none
4860
4861Returns: PR_OK continue processing next line
4862 PR_SKIP skip to a blank line
4863 PR_ABEND abort the pcre2test run
4864*/
4865
4866static int
4867process_command(void)
4868{
4869FILE *f;
4870PCRE2_SIZE serial_size;
4871size_t i;
4872int rc, cmd, cmdlen, yield;
4873uint16_t first_listed_newline;
4874const char *cmdname;
4875uint8_t *argptr, *serial;
4876
4877yield = PR_OK;
4878cmd = CMD_UNKNOWN;
4879cmdlen = 0;
4880
4881for (i = 0; i < cmdlistcount; i++)
4882 {
4883 cmdname = cmdlist[i].name;
4884 cmdlen = strlen(cmdname);
4885 if (strncmp((char *)(buffer+1), cmdname, cmdlen) == 0 &&
4886 isspace(buffer[cmdlen+1]))
4887 {
4888 cmd = cmdlist[i].value;
4889 break;
4890 }
4891 }
4892
4893argptr = buffer + cmdlen + 1;
4894
4895if (restrict_for_perl_test && cmd != CMD_PATTERN && cmd != CMD_SUBJECT)
4896 {
4897 fprintf(outfile, "** #%s is not allowed after #perltest\n", cmdname);
4898 return PR_ABEND;
4899 }
4900
4901switch(cmd)
4902 {
4903 case CMD_UNKNOWN:
4904 fprintf(outfile, "** Unknown command: %s", buffer);
4905 break;
4906
4907 case CMD_FORBID_UTF:
4908 forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
4909 break;
4910
4911 case CMD_PERLTEST:
4912 restrict_for_perl_test = TRUE;
4913 break;
4914
4915 /* Set default pattern modifiers */
4916
4917 case CMD_PATTERN:
4918 (void)decode_modifiers(argptr, CTX_DEFPAT, &def_patctl, NULL);
4919 if (def_patctl.jit == 0 && (def_patctl.control & CTL_JITVERIFY) != 0)
4920 def_patctl.jit = JIT_DEFAULT;
4921 break;
4922
4923 /* Set default subject modifiers */
4924
4925 case CMD_SUBJECT:
4926 (void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl);
4927 break;
4928
4929 /* Check the default newline, and if not one of those listed, set up the
4930 first one to be forced. An empty list unsets. */
4931
4932 case CMD_NEWLINE_DEFAULT:
4933 local_newline_default = 0; /* Unset */
4934 first_listed_newline = 0;
4935 for (;;)
4936 {
4937 while (isspace(*argptr)) argptr++;
4938 if (*argptr == 0) break;
4939 for (i = 1; i < sizeof(newlines)/sizeof(char *); i++)
4940 {
4941 size_t nlen = strlen(newlines[i]);
4942 if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 &&
4943 isspace(argptr[nlen]))
4944 {
4945 if (i == NEWLINE_DEFAULT) return PR_OK; /* Default is valid */
4946 if (first_listed_newline == 0) first_listed_newline = i;
4947 }
4948 }
4949 while (*argptr != 0 && !isspace(*argptr)) argptr++;
4950 }
4951 local_newline_default = first_listed_newline;
4952 break;
4953
4954 /* Pop or copy a compiled pattern off the stack. Modifiers that do not affect
4955 the compiled pattern (e.g. to give information) are permitted. The default
4956 pattern modifiers are ignored. */
4957
4958 case CMD_POP:
4959 case CMD_POPCOPY:
4960 if (patstacknext <= 0)
4961 {
4962 fprintf(outfile, "** Can't pop off an empty stack\n");
4963 return PR_SKIP;
4964 }
4965 memset(&pat_patctl, 0, sizeof(patctl)); /* Completely unset */
4966 if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL))
4967 return PR_SKIP;
4968
4969 if (cmd == CMD_POP)
4970 {
4971 SET(compiled_code, patstack[--patstacknext]);
4972 }
4973 else
4974 {
4975 PCRE2_CODE_COPY_FROM_VOID(compiled_code, patstack[patstacknext - 1]);
4976 }
4977
4978 if (pat_patctl.jit != 0)
4979 {
4980 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
4981 }
4982 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
4983 if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
4984 if ((pat_patctl.control & CTL_ANYINFO) != 0)
4985 {
4986 rc = show_pattern_info();
4987 if (rc != PR_OK) return rc;
4988 }
4989 break;
4990
4991 /* Save the stack of compiled patterns to a file, then empty the stack. */
4992
4993 case CMD_SAVE:
4994 if (patstacknext <= 0)
4995 {
4996 fprintf(outfile, "** No stacked patterns to save\n");
4997 return PR_OK;
4998 }
4999
5000 rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f, "#save");
5001 if (rc != PR_OK) return rc;
5002
5003 PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size,
5004 general_context);
5005 if (rc < 0)
5006 {
5007 fclose(f);
5008 if (!serial_error(rc, "Serialization")) return PR_ABEND;
5009 break;
5010 }
5011
5012 /* Write the length at the start of the file to make it straightforward to
5013 get the right memory when re-loading. This saves having to read the file size
5014 in different operating systems. To allow for different endianness (even
5015 though reloading with the opposite endianness does not work), write the
5016 length byte-by-byte. */
5017
5018 for (i = 0; i < 4; i++) fputc((serial_size >> (i*8)) & 255, f);
5019 if (fwrite(serial, 1, serial_size, f) != serial_size)
5020 {
5021 fprintf(outfile, "** Wrong return from fwrite()\n");
5022 fclose(f);
5023 return PR_ABEND;
5024 }
5025
5026 fclose(f);
5027 PCRE2_SERIALIZE_FREE(serial);
5028 while(patstacknext > 0)
5029 {
5030 SET(compiled_code, patstack[--patstacknext]);
5031 SUB1(pcre2_code_free, compiled_code);
5032 }
5033 SET(compiled_code, NULL);
5034 break;
5035
5036 /* Load a set of compiled patterns from a file onto the stack */
5037
5038 case CMD_LOAD:
5039 rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#load");
5040 if (rc != PR_OK) return rc;
5041
5042 serial_size = 0;
5043 for (i = 0; i < 4; i++) serial_size |= fgetc(f) << (i*8);
5044
5045 serial = malloc(serial_size);
5046 if (serial == NULL)
5047 {
5048 fprintf(outfile, "** Failed to get memory (size %" SIZ_FORM ") for #load\n",
Elliott Hughes16619d62021-10-29 12:10:38 -07005049 serial_size);
Elliott Hughes5b808042021-10-01 10:56:10 -07005050 fclose(f);
5051 return PR_ABEND;
5052 }
5053
5054 i = fread(serial, 1, serial_size, f);
5055 fclose(f);
5056
5057 if (i != serial_size)
5058 {
5059 fprintf(outfile, "** Wrong return from fread()\n");
5060 yield = PR_ABEND;
5061 }
5062 else
5063 {
5064 PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(rc, serial);
5065 if (rc < 0)
5066 {
5067 if (!serial_error(rc, "Get number of codes")) yield = PR_ABEND;
5068 }
5069 else
5070 {
5071 if (rc + patstacknext > PATSTACKSIZE)
5072 {
5073 fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n",
5074 rc, (rc == 1)? "" : "s");
5075 rc = PATSTACKSIZE - patstacknext;
5076 fprintf(outfile, "** Decoding %d pattern%s\n", rc,
5077 (rc == 1)? "" : "s");
5078 }
5079 PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial,
5080 general_context);
5081 if (rc < 0)
5082 {
5083 if (!serial_error(rc, "Deserialization")) yield = PR_ABEND;
5084 }
5085 else patstacknext += rc;
5086 }
5087 }
5088
5089 free(serial);
5090 break;
5091
5092 /* Load a set of binary tables into tables3. */
5093
5094 case CMD_LOADTABLES:
5095 rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#loadtables");
5096 if (rc != PR_OK) return rc;
5097
5098 if (tables3 == NULL)
5099 {
5100 (void)PCRE2_CONFIG(PCRE2_CONFIG_TABLES_LENGTH, &loadtables_length);
5101 tables3 = malloc(loadtables_length);
5102 }
5103
5104 if (tables3 == NULL)
5105 {
5106 fprintf(outfile, "** Failed: malloc failed for #loadtables\n");
5107 yield = PR_ABEND;
5108 }
5109 else if (fread(tables3, 1, loadtables_length, f) != loadtables_length)
5110 {
5111 fprintf(outfile, "** Wrong return from fread()\n");
5112 yield = PR_ABEND;
5113 }
5114
5115 fclose(f);
5116 break;
5117 }
5118
5119return yield;
5120}
5121
5122
5123
5124/*************************************************
5125* Process pattern line *
5126*************************************************/
5127
5128/* This function is called when the input buffer contains the start of a
5129pattern. The first character is known to be a valid delimiter. The pattern is
5130read, modifiers are interpreted, and a suitable local context is set up for
5131this test. The pattern is then compiled.
5132
5133Arguments: none
5134
5135Returns: PR_OK continue processing next line
5136 PR_SKIP skip to a blank line
5137 PR_ABEND abort the pcre2test run
5138*/
5139
5140static int
5141process_pattern(void)
5142{
5143BOOL utf;
5144uint32_t k;
5145uint8_t *p = buffer;
5146unsigned int delimiter = *p++;
5147int errorcode;
5148void *use_pat_context;
5149uint32_t use_forbid_utf = forbid_utf;
5150PCRE2_SIZE patlen;
5151PCRE2_SIZE valgrind_access_length;
5152PCRE2_SIZE erroroffset;
5153
5154/* The perltest.sh script supports only / as a delimiter. */
5155
5156if (restrict_for_perl_test && delimiter != '/')
5157 {
5158 fprintf(outfile, "** The only allowed delimiter after #perltest is '/'\n");
5159 return PR_ABEND;
5160 }
5161
5162/* Initialize the context and pattern/data controls for this test from the
5163defaults. */
5164
5165PATCTXCPY(pat_context, default_pat_context);
5166memcpy(&pat_patctl, &def_patctl, sizeof(patctl));
5167
5168/* Find the end of the pattern, reading more lines if necessary. */
5169
5170for(;;)
5171 {
5172 while (*p != 0)
5173 {
5174 if (*p == '\\' && p[1] != 0) p++;
5175 else if (*p == delimiter) break;
5176 p++;
5177 }
5178 if (*p != 0) break;
5179 if ((p = extend_inputline(infile, p, " > ")) == NULL)
5180 {
5181 fprintf(outfile, "** Unexpected EOF\n");
5182 return PR_ABEND;
5183 }
5184 if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)p);
5185 }
5186
5187/* If the first character after the delimiter is backslash, make the pattern
5188end with backslash. This is purely to provide a way of testing for the error
5189message when a pattern ends with backslash. */
5190
5191if (p[1] == '\\') *p++ = '\\';
5192
5193/* Terminate the pattern at the delimiter, and compute the length. */
5194
5195*p++ = 0;
5196patlen = p - buffer - 2;
5197
5198/* Look for modifiers and options after the final delimiter. */
5199
5200if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
5201
5202/* Note that the match_invalid_utf option also sets utf when passed to
5203pcre2_compile(). */
5204
5205utf = (pat_patctl.options & (PCRE2_UTF|PCRE2_MATCH_INVALID_UTF)) != 0;
5206
5207/* The utf8_input modifier is not allowed in 8-bit mode, and is mutually
5208exclusive with the utf modifier. */
5209
5210if ((pat_patctl.control & CTL_UTF8_INPUT) != 0)
5211 {
5212 if (test_mode == PCRE8_MODE)
5213 {
5214 fprintf(outfile, "** The utf8_input modifier is not allowed in 8-bit mode\n");
5215 return PR_SKIP;
5216 }
5217 if (utf)
5218 {
5219 fprintf(outfile, "** The utf and utf8_input modifiers are mutually exclusive\n");
5220 return PR_SKIP;
5221 }
5222 }
5223
5224/* The convert and posix modifiers are mutually exclusive. */
5225
5226if (pat_patctl.convert_type != CONVERT_UNSET &&
5227 (pat_patctl.control & CTL_POSIX) != 0)
5228 {
5229 fprintf(outfile, "** The convert and posix modifiers are mutually exclusive\n");
5230 return PR_SKIP;
5231 }
5232
5233/* Check for mutually exclusive control modifiers. At present, these are all in
5234the first control word. */
5235
5236for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++)
5237 {
5238 uint32_t c = pat_patctl.control & exclusive_pat_controls[k];
5239 if (c != 0 && c != (c & (~c+1)))
5240 {
5241 show_controls(c, 0, "** Not allowed together:");
5242 fprintf(outfile, "\n");
5243 return PR_SKIP;
5244 }
5245 }
5246
5247/* Assume full JIT compile for jitverify and/or jitfast if nothing else was
5248specified. */
5249
5250if (pat_patctl.jit == 0 &&
5251 (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
5252 pat_patctl.jit = JIT_DEFAULT;
5253
5254/* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
5255in callouts. Convert from hex if requested (literal strings in quotes may be
5256present within the hexadecimal pairs). The result must necessarily be fewer
5257characters so will always fit in pbuffer8. */
5258
5259if ((pat_patctl.control & CTL_HEXPAT) != 0)
5260 {
5261 uint8_t *pp, *pt;
5262 uint32_t c, d;
5263
5264 pt = pbuffer8;
5265 for (pp = buffer + 1; *pp != 0; pp++)
5266 {
5267 if (isspace(*pp)) continue;
5268 c = *pp++;
5269
5270 /* Handle a literal substring */
5271
5272 if (c == '\'' || c == '"')
5273 {
5274 uint8_t *pq = pp;
5275 for (;; pp++)
5276 {
5277 d = *pp;
5278 if (d == 0)
5279 {
5280 fprintf(outfile, "** Missing closing quote in hex pattern: "
5281 "opening quote is at offset %" PTR_FORM ".\n", pq - buffer - 2);
5282 return PR_SKIP;
5283 }
5284 if (d == c) break;
5285 *pt++ = d;
5286 }
5287 }
5288
5289 /* Expect a hex pair */
5290
5291 else
5292 {
5293 if (!isxdigit(c))
5294 {
5295 fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5296 PTR_FORM " in hex pattern: quote missing?\n", c, pp - buffer - 2);
5297 return PR_SKIP;
5298 }
5299 if (*pp == 0)
5300 {
5301 fprintf(outfile, "** Odd number of digits in hex pattern\n");
5302 return PR_SKIP;
5303 }
5304 d = *pp;
5305 if (!isxdigit(d))
5306 {
5307 fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5308 PTR_FORM " in hex pattern: quote missing?\n", d, pp - buffer - 1);
5309 return PR_SKIP;
5310 }
5311 c = toupper(c);
5312 d = toupper(d);
5313 *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) +
5314 (isdigit(d)? (d - '0') : (d - 'A' + 10));
5315 }
5316 }
5317 *pt = 0;
5318 patlen = pt - pbuffer8;
5319 }
5320
5321/* If not a hex string, process for repetition expansion if requested. */
5322
5323else if ((pat_patctl.control & CTL_EXPAND) != 0)
5324 {
5325 uint8_t *pp, *pt;
5326
5327 pt = pbuffer8;
5328 for (pp = buffer + 1; *pp != 0; pp++)
5329 {
5330 uint8_t *pc = pp;
5331 uint32_t count = 1;
5332 size_t length = 1;
5333
5334 /* Check for replication syntax; if not found, the defaults just set will
5335 prevail and one character will be copied. */
5336
5337 if (pp[0] == '\\' && pp[1] == '[')
5338 {
5339 uint8_t *pe;
5340 for (pe = pp + 2; *pe != 0; pe++)
5341 {
5342 if (pe[0] == ']' && pe[1] == '{')
5343 {
5344 uint32_t clen = pe - pc - 2;
5345 uint32_t i = 0;
5346 unsigned long uli;
5347 char *endptr;
5348
5349 pe += 2;
5350 uli = strtoul((const char *)pe, &endptr, 10);
5351 if (U32OVERFLOW(uli))
5352 {
5353 fprintf(outfile, "** Pattern repeat count too large\n");
5354 return PR_SKIP;
5355 }
5356
5357 i = (uint32_t)uli;
5358 pe = (uint8_t *)endptr;
5359 if (*pe == '}')
5360 {
5361 if (i == 0)
5362 {
5363 fprintf(outfile, "** Zero repeat not allowed\n");
5364 return PR_SKIP;
5365 }
5366 pc += 2;
5367 count = i;
5368 length = clen;
5369 pp = pe;
5370 break;
5371 }
5372 }
5373 }
5374 }
5375
5376 /* Add to output. If the buffer is too small expand it. The function for
5377 expanding buffers always keeps buffer and pbuffer8 in step as far as their
5378 size goes. */
5379
5380 while (pt + count * length > pbuffer8 + pbuffer8_size)
5381 {
5382 size_t pc_offset = pc - buffer;
5383 size_t pp_offset = pp - buffer;
5384 size_t pt_offset = pt - pbuffer8;
5385 expand_input_buffers();
5386 pc = buffer + pc_offset;
5387 pp = buffer + pp_offset;
5388 pt = pbuffer8 + pt_offset;
5389 }
5390
5391 for (; count > 0; count--)
5392 {
5393 memcpy(pt, pc, length);
5394 pt += length;
5395 }
5396 }
5397
5398 *pt = 0;
5399 patlen = pt - pbuffer8;
5400
5401 if ((pat_patctl.control & CTL_INFO) != 0)
5402 fprintf(outfile, "Expanded: %s\n", pbuffer8);
5403 }
5404
5405/* Neither hex nor expanded, just copy the input verbatim. */
5406
5407else
5408 {
5409 strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1);
5410 }
5411
5412/* Sort out character tables */
5413
5414if (pat_patctl.locale[0] != 0)
5415 {
5416 if (pat_patctl.tables_id != 0)
5417 {
5418 fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n");
5419 return PR_SKIP;
5420 }
5421 if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL)
5422 {
5423 fprintf(outfile, "** Failed to set locale '%s'\n", pat_patctl.locale);
5424 return PR_SKIP;
5425 }
5426 if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0)
5427 {
5428 strcpy((char *)locale_name, (char *)pat_patctl.locale);
5429 if (locale_tables != NULL) free((void *)locale_tables);
5430 PCRE2_MAKETABLES(locale_tables);
5431 }
5432 use_tables = locale_tables;
5433 }
5434
5435else switch (pat_patctl.tables_id)
5436 {
5437 case 0: use_tables = NULL; break;
5438 case 1: use_tables = tables1; break;
5439 case 2: use_tables = tables2; break;
5440
5441 case 3:
5442 if (tables3 == NULL)
5443 {
5444 fprintf(outfile, "** 'Tables = 3' is invalid: binary tables have not "
5445 "been loaded\n");
5446 return PR_SKIP;
5447 }
5448 use_tables = tables3;
5449 break;
5450
5451 default:
5452 fprintf(outfile, "** 'Tables' must specify 0, 1, 2, or 3.\n");
5453 return PR_SKIP;
5454 }
5455
5456PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);
5457
5458/* Set up for the stackguard test. */
5459
5460if (pat_patctl.stackguard_test != 0)
5461 {
5462 PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard, NULL);
5463 }
5464
5465/* Handle compiling via the POSIX interface, which doesn't support the
5466timing, showing, or debugging options, nor the ability to pass over
5467local character tables. Neither does it have 16-bit or 32-bit support. */
5468
5469if ((pat_patctl.control & CTL_POSIX) != 0)
5470 {
5471#ifdef SUPPORT_PCRE2_8
5472 int rc;
5473 int cflags = 0;
5474 const char *msg = "** Ignored with POSIX interface:";
5475#endif
5476
5477 if (test_mode != PCRE8_MODE)
5478 {
5479 fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n");
5480 return PR_SKIP;
5481 }
5482
5483#ifdef SUPPORT_PCRE2_8
5484 /* Check for features that the POSIX interface does not support. */
5485
5486 if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
5487 if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace");
5488 if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
5489 if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
5490 if (timeit > 0) prmsg(&msg, "timing");
5491 if (pat_patctl.jit != 0) prmsg(&msg, "JIT");
5492
5493 if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
5494 {
5495 show_compile_options(
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07005496 pat_patctl.options & (uint32_t)(~POSIX_SUPPORTED_COMPILE_OPTIONS),
Elliott Hughes5b808042021-10-01 10:56:10 -07005497 msg, "");
5498 msg = "";
5499 }
5500
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07005501 if ((FLD(pat_context, extra_options) &
5502 (uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS)) != 0)
Elliott Hughes5b808042021-10-01 10:56:10 -07005503 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07005504 show_compile_extra_options(
5505 FLD(pat_context, extra_options) &
5506 (uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS), msg, "");
5507 msg = "";
5508 }
5509
5510 if ((pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS)) != 0 ||
5511 (pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2)) != 0)
5512 {
5513 show_controls(
5514 pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS),
5515 pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2),
5516 msg);
Elliott Hughes5b808042021-10-01 10:56:10 -07005517 msg = "";
5518 }
5519
5520 if (local_newline_default != 0) prmsg(&msg, "#newline_default");
5521 if (FLD(pat_context, max_pattern_length) != PCRE2_UNSET)
5522 prmsg(&msg, "max_pattern_length");
5523 if (FLD(pat_context, parens_nest_limit) != PARENS_NEST_DEFAULT)
5524 prmsg(&msg, "parens_nest_limit");
5525
5526 if (msg[0] == 0) fprintf(outfile, "\n");
5527
5528 /* Translate PCRE2 options to POSIX options and then compile. */
5529
5530 if (utf) cflags |= REG_UTF;
5531 if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB;
5532 if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP;
5533 if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE;
5534 if ((pat_patctl.options & PCRE2_LITERAL) != 0) cflags |= REG_NOSPEC;
5535 if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE;
5536 if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL;
5537 if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
5538
5539 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) != 0)
5540 {
5541 preg.re_endp = (char *)pbuffer8 + patlen;
5542 cflags |= REG_PEND;
5543 }
5544
5545 rc = regcomp(&preg, (char *)pbuffer8, cflags);
5546
5547 /* Compiling failed */
5548
5549 if (rc != 0)
5550 {
5551 size_t bsize, usize;
5552 int psize;
5553
5554 preg.re_pcre2_code = NULL; /* In case something was left in there */
5555 preg.re_match_data = NULL;
5556
5557 bsize = (pat_patctl.regerror_buffsize != 0)?
5558 pat_patctl.regerror_buffsize : pbuffer8_size;
5559 if (bsize + 8 < pbuffer8_size)
5560 memcpy(pbuffer8 + bsize, "DEADBEEF", 8);
5561 usize = regerror(rc, &preg, (char *)pbuffer8, bsize);
5562
5563 /* Inside regerror(), snprintf() is used. If the buffer is too small, some
5564 versions of snprintf() put a zero byte at the end, but others do not.
5565 Therefore, we print a maximum of one less than the size of the buffer. */
5566
5567 psize = (int)bsize - 1;
5568 fprintf(outfile, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8);
5569 if (usize > bsize)
5570 {
5571 fprintf(outfile, "** regerror() message truncated\n");
5572 if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0)
5573 fprintf(outfile, "** regerror() buffer overflow\n");
5574 }
5575 return PR_SKIP;
5576 }
5577
5578 /* Compiling succeeded. Check that the values in the preg block are sensible.
5579 It can happen that pcre2test is accidentally linked with a different POSIX
5580 library which succeeds, but of course puts different things into preg. In
5581 this situation, calling regfree() may cause a segfault (or invalid free() in
5582 valgrind), so ensure that preg.re_pcre2_code is NULL, which suppresses the
5583 calling of regfree() on exit. */
5584
5585 if (preg.re_pcre2_code == NULL ||
5586 ((pcre2_real_code_8 *)preg.re_pcre2_code)->magic_number != MAGIC_NUMBER ||
5587 ((pcre2_real_code_8 *)preg.re_pcre2_code)->top_bracket != preg.re_nsub ||
5588 preg.re_match_data == NULL ||
5589 preg.re_cflags != cflags)
5590 {
5591 fprintf(outfile,
5592 "** The regcomp() function returned zero (success), but the values set\n"
5593 "** in the preg block are not valid for PCRE2. Check that pcre2test is\n"
5594 "** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n"
5595 "** some other POSIX regex library.\n**\n");
5596 preg.re_pcre2_code = NULL;
5597 return PR_ABEND;
5598 }
5599
5600 return PR_OK;
5601#endif /* SUPPORT_PCRE2_8 */
5602 }
5603
5604/* Handle compiling via the native interface. Controls that act later are
5605ignored with "push". Replacements are locked out. */
5606
5607if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5608 {
5609 if (pat_patctl.replacement[0] != 0)
5610 {
5611 fprintf(outfile, "** Replacement text is not supported with 'push'.\n");
5612 return PR_OK;
5613 }
5614 if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 ||
5615 (pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0)
5616 {
5617 show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS,
5618 pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2,
5619 "** Ignored when compiled pattern is stacked with 'push':");
5620 fprintf(outfile, "\n");
5621 }
5622 if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 ||
5623 (pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0)
5624 {
5625 show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS,
5626 pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2,
5627 "** Applies only to compile when pattern is stacked with 'push':");
5628 fprintf(outfile, "\n");
5629 }
5630 }
5631
5632/* Convert the input in non-8-bit modes. */
5633
5634errorcode = 0;
5635
5636#ifdef SUPPORT_PCRE2_16
5637if (test_mode == PCRE16_MODE) errorcode = to16(pbuffer8, utf, &patlen);
5638#endif
5639
5640#ifdef SUPPORT_PCRE2_32
5641if (test_mode == PCRE32_MODE) errorcode = to32(pbuffer8, utf, &patlen);
5642#endif
5643
5644switch(errorcode)
5645 {
5646 case -1:
5647 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be "
5648 "converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32);
5649 return PR_SKIP;
5650
5651 case -2:
5652 fprintf(outfile, "** Failed: character value greater than 0x10ffff "
5653 "cannot be converted to UTF\n");
5654 return PR_SKIP;
5655
5656 case -3:
5657 fprintf(outfile, "** Failed: character value greater than 0xffff "
5658 "cannot be converted to 16-bit in non-UTF mode\n");
5659 return PR_SKIP;
5660
5661 default:
5662 break;
5663 }
5664
5665/* The pattern is now in pbuffer[8|16|32], with the length in code units in
5666patlen. If it is to be converted, copy the result back afterwards so that it
5667ends up back in the usual place. */
5668
5669if (pat_patctl.convert_type != CONVERT_UNSET)
5670 {
5671 int rc;
5672 int convert_return = PR_OK;
5673 uint32_t convert_options = pat_patctl.convert_type;
5674 void *converted_pattern;
5675 PCRE2_SIZE converted_length;
5676
5677 if (pat_patctl.convert_length != 0)
5678 {
5679 converted_length = pat_patctl.convert_length;
5680 converted_pattern = malloc(converted_length * code_unit_size);
5681 if (converted_pattern == NULL)
5682 {
5683 fprintf(outfile, "** Failed: malloc failed for converted pattern\n");
5684 return PR_SKIP;
5685 }
5686 }
5687 else converted_pattern = NULL; /* Let the library allocate */
5688
5689 if (utf) convert_options |= PCRE2_CONVERT_UTF;
5690 if ((pat_patctl.options & PCRE2_NO_UTF_CHECK) != 0)
5691 convert_options |= PCRE2_CONVERT_NO_UTF_CHECK;
5692
5693 CONCTXCPY(con_context, default_con_context);
5694
5695 if (pat_patctl.convert_glob_escape != 0)
5696 {
5697 uint32_t escape = (pat_patctl.convert_glob_escape == '0')? 0 :
5698 pat_patctl.convert_glob_escape;
5699 PCRE2_SET_GLOB_ESCAPE(rc, con_context, escape);
5700 if (rc != 0)
5701 {
5702 fprintf(outfile, "** Invalid glob escape '%c'\n",
5703 pat_patctl.convert_glob_escape);
5704 convert_return = PR_SKIP;
5705 goto CONVERT_FINISH;
5706 }
5707 }
5708
5709 if (pat_patctl.convert_glob_separator != 0)
5710 {
5711 PCRE2_SET_GLOB_SEPARATOR(rc, con_context, pat_patctl.convert_glob_separator);
5712 if (rc != 0)
5713 {
5714 fprintf(outfile, "** Invalid glob separator '%c'\n",
5715 pat_patctl.convert_glob_separator);
5716 convert_return = PR_SKIP;
5717 goto CONVERT_FINISH;
5718 }
5719 }
5720
5721 PCRE2_PATTERN_CONVERT(rc, pbuffer, patlen, convert_options,
5722 &converted_pattern, &converted_length, con_context);
5723
5724 if (rc != 0)
5725 {
5726 fprintf(outfile, "** Pattern conversion error at offset %" SIZ_FORM ": ",
Elliott Hughes16619d62021-10-29 12:10:38 -07005727 converted_length);
Elliott Hughes5b808042021-10-01 10:56:10 -07005728 convert_return = print_error_message(rc, "", "\n")? PR_SKIP:PR_ABEND;
5729 }
5730
5731 /* Output the converted pattern, then copy it. */
5732
5733 else
5734 {
5735 PCHARSV(converted_pattern, 0, converted_length, utf, outfile);
5736 fprintf(outfile, "\n");
5737 patlen = converted_length;
5738 CONVERT_COPY(pbuffer, converted_pattern, converted_length + 1);
5739 }
5740
5741 /* Free the converted pattern. */
5742
5743 CONVERT_FINISH:
5744 if (pat_patctl.convert_length != 0)
5745 free(converted_pattern);
5746 else
5747 PCRE2_CONVERTED_PATTERN_FREE(converted_pattern);
5748
5749 /* Return if conversion was unsuccessful. */
5750
5751 if (convert_return != PR_OK) return convert_return;
5752 }
5753
5754/* By default we pass a zero-terminated pattern, but a length is passed if
5755"use_length" was specified or this is a hex pattern (which might contain binary
5756zeros). When valgrind is supported, arrange for the unused part of the buffer
5757to be marked as no access. */
5758
5759valgrind_access_length = patlen;
5760if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0)
5761 {
5762 patlen = PCRE2_ZERO_TERMINATED;
5763 valgrind_access_length += 1; /* For the terminating zero */
5764 }
5765
5766#ifdef SUPPORT_VALGRIND
5767#ifdef SUPPORT_PCRE2_8
5768if (test_mode == PCRE8_MODE && pbuffer8 != NULL)
5769 {
5770 VALGRIND_MAKE_MEM_NOACCESS(pbuffer8 + valgrind_access_length,
5771 pbuffer8_size - valgrind_access_length);
5772 }
5773#endif
5774#ifdef SUPPORT_PCRE2_16
5775if (test_mode == PCRE16_MODE && pbuffer16 != NULL)
5776 {
5777 VALGRIND_MAKE_MEM_NOACCESS(pbuffer16 + valgrind_access_length,
5778 pbuffer16_size - valgrind_access_length*sizeof(uint16_t));
5779 }
5780#endif
5781#ifdef SUPPORT_PCRE2_32
5782if (test_mode == PCRE32_MODE && pbuffer32 != NULL)
5783 {
5784 VALGRIND_MAKE_MEM_NOACCESS(pbuffer32 + valgrind_access_length,
5785 pbuffer32_size - valgrind_access_length*sizeof(uint32_t));
5786 }
5787#endif
5788#else /* Valgrind not supported */
5789(void)valgrind_access_length; /* Avoid compiler warning */
5790#endif
5791
5792/* If #newline_default has been used and the library was not compiled with an
5793appropriate default newline setting, local_newline_default will be non-zero. We
5794use this if there is no explicit newline modifier. */
5795
5796if ((pat_patctl.control2 & CTL2_NL_SET) == 0 && local_newline_default != 0)
5797 {
5798 SETFLD(pat_context, newline_convention, local_newline_default);
5799 }
5800
5801/* The null_context modifier is used to test calling pcre2_compile() with a
5802NULL context. */
5803
5804use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)?
5805 NULL : PTR(pat_context);
5806
5807/* If PCRE2_LITERAL is set, set use_forbid_utf zero because PCRE2_NEVER_UTF
5808and PCRE2_NEVER_UCP are invalid with it. */
5809
5810if ((pat_patctl.options & PCRE2_LITERAL) != 0) use_forbid_utf = 0;
5811
5812/* Compile many times when timing. */
5813
5814if (timeit > 0)
5815 {
5816 int i;
5817 clock_t time_taken = 0;
5818 for (i = 0; i < timeit; i++)
5819 {
5820 clock_t start_time = clock();
5821 PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5822 pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5823 use_pat_context);
5824 time_taken += clock() - start_time;
5825 if (TEST(compiled_code, !=, NULL))
5826 { SUB1(pcre2_code_free, compiled_code); }
5827 }
5828 total_compile_time += time_taken;
5829 fprintf(outfile, "Compile time %.4f milliseconds\n",
5830 (((double)time_taken * 1000.0) / (double)timeit) /
5831 (double)CLOCKS_PER_SEC);
5832 }
5833
5834/* A final compile that is used "for real". */
5835
5836PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|use_forbid_utf,
5837 &errorcode, &erroroffset, use_pat_context);
5838
5839/* Call the JIT compiler if requested. When timing, we must free and recompile
5840the pattern each time because that is the only way to free the JIT compiled
5841code. We know that compilation will always succeed. */
5842
5843if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0)
5844 {
5845 if (timeit > 0)
5846 {
5847 int i;
5848 clock_t time_taken = 0;
5849
5850 for (i = 0; i < timeit; i++)
5851 {
5852 clock_t start_time;
5853 SUB1(pcre2_code_free, compiled_code);
5854 PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5855 pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5856 use_pat_context);
5857 start_time = clock();
5858 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5859 time_taken += clock() - start_time;
5860 }
5861 total_jit_compile_time += time_taken;
5862 fprintf(outfile, "JIT compile %.4f milliseconds\n",
5863 (((double)time_taken * 1000.0) / (double)timeit) /
5864 (double)CLOCKS_PER_SEC);
5865 }
5866 else
5867 {
5868 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5869 }
5870 }
5871
5872/* If valgrind is supported, mark the pbuffer as accessible again. The 16-bit
5873and 32-bit buffers can be marked completely undefined, but we must leave the
5874pattern in the 8-bit buffer defined because it may be read from a callout
5875during matching. */
5876
5877#ifdef SUPPORT_VALGRIND
5878#ifdef SUPPORT_PCRE2_8
5879if (test_mode == PCRE8_MODE)
5880 {
5881 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer8 + valgrind_access_length,
5882 pbuffer8_size - valgrind_access_length);
5883 }
5884#endif
5885#ifdef SUPPORT_PCRE2_16
5886if (test_mode == PCRE16_MODE)
5887 {
5888 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer16, pbuffer16_size);
5889 }
5890#endif
5891#ifdef SUPPORT_PCRE2_32
5892if (test_mode == PCRE32_MODE)
5893 {
5894 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer32, pbuffer32_size);
5895 }
5896#endif
5897#endif
5898
5899/* Compilation failed; go back for another re, skipping to blank line
5900if non-interactive. */
5901
5902if (TEST(compiled_code, ==, NULL))
5903 {
5904 fprintf(outfile, "Failed: error %d at offset %d: ", errorcode,
5905 (int)erroroffset);
5906 if (!print_error_message(errorcode, "", "\n")) return PR_ABEND;
5907 return PR_SKIP;
5908 }
5909
5910/* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
5911locked out at compile time, but we must also check for occurrences of \P, \p,
5912and \X, which are only supported when Unicode is supported. */
5913
5914if (forbid_utf != 0)
5915 {
5916 if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0)
5917 {
5918 fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
5919 "#forbid_utf command\n");
5920 return PR_SKIP;
5921 }
5922 }
5923
5924/* Remember the maximum lookbehind, for partial matching. */
5925
5926if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
5927 return PR_ABEND;
5928
5929/* Remember the number of captures. */
5930
5931if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
5932 return PR_ABEND;
5933
5934/* If an explicit newline modifier was given, set the information flag in the
5935pattern so that it is preserved over push/pop. */
5936
5937if ((pat_patctl.control2 & CTL2_NL_SET) != 0)
5938 {
5939 SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
5940 }
5941
5942/* Output code size and other information if requested. */
5943
5944if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
5945if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
5946if ((pat_patctl.control & CTL_ANYINFO) != 0)
5947 {
5948 int rc = show_pattern_info();
5949 if (rc != PR_OK) return rc;
5950 }
5951
5952/* The "push" control requests that the compiled pattern be remembered on a
5953stack. This is mainly for testing the serialization functionality. */
5954
5955if ((pat_patctl.control & CTL_PUSH) != 0)
5956 {
5957 if (patstacknext >= PATSTACKSIZE)
5958 {
5959 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5960 return PR_ABEND;
5961 }
5962 patstack[patstacknext++] = PTR(compiled_code);
5963 SET(compiled_code, NULL);
5964 }
5965
5966/* The "pushcopy" and "pushtablescopy" controls are similar, but push a
5967copy of the pattern, the latter with a copy of its character tables. This tests
5968the pcre2_code_copy() and pcre2_code_copy_with_tables() functions. */
5969
5970if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5971 {
5972 if (patstacknext >= PATSTACKSIZE)
5973 {
5974 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5975 return PR_ABEND;
5976 }
5977 if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
5978 {
5979 PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
5980 }
5981 else
5982 {
5983 PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++],
5984 compiled_code); }
5985 }
5986
5987return PR_OK;
5988}
5989
5990
5991
5992/*************************************************
5993* Check heap, match or depth limit *
5994*************************************************/
5995
5996/* This is used for DFA, normal, and JIT fast matching. For DFA matching it
5997should only be called with the third argument set to PCRE2_ERROR_DEPTHLIMIT.
5998
5999Arguments:
6000 pp the subject string
6001 ulen length of subject or PCRE2_ZERO_TERMINATED
6002 errnumber defines which limit to test
6003 msg string to include in final message
6004
6005Returns: the return from the final match function call
6006*/
6007
6008static int
6009check_match_limit(uint8_t *pp, PCRE2_SIZE ulen, int errnumber, const char *msg)
6010{
6011int capcount;
6012uint32_t min = 0;
6013uint32_t mid = 64;
6014uint32_t max = UINT32_MAX;
6015
6016PCRE2_SET_MATCH_LIMIT(dat_context, max);
6017PCRE2_SET_DEPTH_LIMIT(dat_context, max);
6018PCRE2_SET_HEAP_LIMIT(dat_context, max);
6019
6020for (;;)
6021 {
6022 uint32_t stack_start = 0;
6023
6024 if (errnumber == PCRE2_ERROR_HEAPLIMIT)
6025 {
6026 PCRE2_SET_HEAP_LIMIT(dat_context, mid);
6027 }
6028 else if (errnumber == PCRE2_ERROR_MATCHLIMIT)
6029 {
6030 PCRE2_SET_MATCH_LIMIT(dat_context, mid);
6031 }
6032 else
6033 {
6034 PCRE2_SET_DEPTH_LIMIT(dat_context, mid);
6035 }
6036
6037 if ((dat_datctl.control & CTL_DFA) != 0)
6038 {
6039 stack_start = DFA_START_RWS_SIZE/1024;
6040 if (dfa_workspace == NULL)
6041 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
6042 if (dfa_matched++ == 0)
6043 dfa_workspace[0] = -1; /* To catch bad restart */
6044 PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6045 dat_datctl.options, match_data,
6046 PTR(dat_context), dfa_workspace, DFA_WS_DIMENSION);
6047 }
6048
6049 else if ((pat_patctl.control & CTL_JITFAST) != 0)
6050 PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6051 dat_datctl.options, match_data, PTR(dat_context));
6052
6053 else
6054 {
6055 stack_start = START_FRAMES_SIZE/1024;
6056 PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6057 dat_datctl.options, match_data, PTR(dat_context));
6058 }
6059
6060 if (capcount == errnumber)
6061 {
6062 if ((mid & 0x80000000u) != 0)
6063 {
6064 fprintf(outfile, "Can't find minimum %s limit: check pattern for "
6065 "restriction\n", msg);
6066 break;
6067 }
6068
6069 min = mid;
6070 mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
6071 }
6072 else if (capcount >= 0 ||
6073 capcount == PCRE2_ERROR_NOMATCH ||
6074 capcount == PCRE2_ERROR_PARTIAL)
6075 {
6076 /* If we've not hit the error with a heap limit less than the size of the
6077 initial stack frame vector (for pcre2_match()) or the initial stack
6078 workspace vector (for pcre2_dfa_match()), the heap is not being used, so
6079 the minimum limit is zero; there's no need to go on. The other limits are
6080 always greater than zero. */
6081
6082 if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < stack_start)
6083 {
6084 fprintf(outfile, "Minimum %s limit = 0\n", msg);
6085 break;
6086 }
6087 if (mid == min + 1)
6088 {
6089 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
6090 break;
6091 }
6092 max = mid;
6093 mid = (min + max)/2;
6094 }
6095 else break; /* Some other error */
6096 }
6097
6098return capcount;
6099}
6100
6101
6102
6103/*************************************************
6104* Substitute callout function *
6105*************************************************/
6106
6107/* Called from pcre2_substitute() when the substitute_callout modifier is set.
6108Print out the data that is passed back. The substitute callout block is
6109identical for all code unit widths, so we just pick one.
6110
6111Arguments:
6112 scb pointer to substitute callout block
6113 data_ptr callout data
6114
6115Returns: nothing
6116*/
6117
6118static int
6119substitute_callout_function(pcre2_substitute_callout_block_8 *scb,
6120 void *data_ptr)
6121{
6122int yield = 0;
6123BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6124(void)data_ptr; /* Not used */
6125
6126fprintf(outfile, "%2d(%d) Old %" SIZ_FORM " %" SIZ_FORM " \"",
6127 scb->subscount, scb->oveccount,
Elliott Hughes16619d62021-10-29 12:10:38 -07006128 scb->ovector[0], scb->ovector[1]);
Elliott Hughes5b808042021-10-01 10:56:10 -07006129
6130PCHARSV(scb->input, scb->ovector[0], scb->ovector[1] - scb->ovector[0],
6131 utf, outfile);
6132
6133fprintf(outfile, "\" New %" SIZ_FORM " %" SIZ_FORM " \"",
Elliott Hughes16619d62021-10-29 12:10:38 -07006134 scb->output_offsets[0], scb->output_offsets[1]);
Elliott Hughes5b808042021-10-01 10:56:10 -07006135
6136PCHARSV(scb->output, scb->output_offsets[0],
6137 scb->output_offsets[1] - scb->output_offsets[0], utf, outfile);
6138
6139if (scb->subscount == dat_datctl.substitute_stop)
6140 {
6141 yield = -1;
6142 fprintf(outfile, " STOPPED");
6143 }
6144else if (scb->subscount == dat_datctl.substitute_skip)
6145 {
6146 yield = +1;
6147 fprintf(outfile, " SKIPPED");
6148 }
6149
6150fprintf(outfile, "\"\n");
6151return yield;
6152}
6153
6154
6155/*************************************************
6156* Callout function *
6157*************************************************/
6158
6159/* Called from a PCRE2 library as a result of the (?C) item. We print out where
6160we are in the match (unless suppressed). Yield zero unless more callouts than
6161the fail count, or the callout data is not zero. The only differences in the
6162callout block for different code unit widths are that the pointers to the
6163subject, the most recent MARK, and a callout argument string point to strings
6164of the appropriate width. Casts can be used to deal with this.
6165
6166Arguments:
6167 cb a pointer to a callout block
6168 callout_data_ptr the provided callout data
6169
6170Returns: 0 or 1 or an error, as determined by settings
6171*/
6172
6173static int
6174callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr)
6175{
6176FILE *f, *fdefault;
6177uint32_t i, pre_start, post_start, subject_length;
6178PCRE2_SIZE current_position;
6179BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6180BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
6181BOOL callout_where = (dat_datctl.control2 & CTL2_CALLOUT_NO_WHERE) == 0;
6182
6183/* The FILE f is used for echoing the subject string if it is non-NULL. This
6184happens only once in simple cases, but we want to repeat after any additional
6185output caused by CALLOUT_EXTRA. */
6186
6187fdefault = (!first_callout && !callout_capture && cb->callout_string == NULL)?
6188 NULL : outfile;
6189
6190if ((dat_datctl.control2 & CTL2_CALLOUT_EXTRA) != 0)
6191 {
6192 f = outfile;
6193 switch (cb->callout_flags)
6194 {
6195 case PCRE2_CALLOUT_BACKTRACK:
6196 fprintf(f, "Backtrack\n");
6197 break;
6198
6199 case PCRE2_CALLOUT_STARTMATCH|PCRE2_CALLOUT_BACKTRACK:
6200 fprintf(f, "Backtrack\nNo other matching paths\n");
6201 /* Fall through */
6202
6203 case PCRE2_CALLOUT_STARTMATCH:
6204 fprintf(f, "New match attempt\n");
6205 break;
6206
6207 default:
6208 f = fdefault;
6209 break;
6210 }
6211 }
6212else f = fdefault;
6213
6214/* For a callout with a string argument, show the string first because there
6215isn't a tidy way to fit it in the rest of the data. */
6216
6217if (cb->callout_string != NULL)
6218 {
6219 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
6220 fprintf(outfile, "Callout (%" SIZ_FORM "): %c",
Elliott Hughes16619d62021-10-29 12:10:38 -07006221 cb->callout_string_offset, delimiter);
Elliott Hughes5b808042021-10-01 10:56:10 -07006222 PCHARSV(cb->callout_string, 0,
6223 cb->callout_string_length, utf, outfile);
6224 for (i = 0; callout_start_delims[i] != 0; i++)
6225 if (delimiter == callout_start_delims[i])
6226 {
6227 delimiter = callout_end_delims[i];
6228 break;
6229 }
6230 fprintf(outfile, "%c", delimiter);
6231 if (!callout_capture) fprintf(outfile, "\n");
6232 }
6233
6234/* Show captured strings if required */
6235
6236if (callout_capture)
6237 {
6238 if (cb->callout_string == NULL)
6239 fprintf(outfile, "Callout %d:", cb->callout_number);
6240 fprintf(outfile, " last capture = %d\n", cb->capture_last);
6241 for (i = 2; i < cb->capture_top * 2; i += 2)
6242 {
6243 fprintf(outfile, "%2d: ", i/2);
6244 if (cb->offset_vector[i] == PCRE2_UNSET)
6245 fprintf(outfile, "<unset>");
6246 else
6247 {
6248 PCHARSV(cb->subject, cb->offset_vector[i],
6249 cb->offset_vector[i+1] - cb->offset_vector[i], utf, f);
6250 }
6251 fprintf(outfile, "\n");
6252 }
6253 }
6254
6255/* Unless suppressed, re-print the subject in canonical form (with escapes for
6256non-printing characters), the first time, or if giving full details. On
6257subsequent calls in the same match, we use PCHARS() just to find the printed
6258lengths of the substrings. */
6259
6260if (callout_where)
6261 {
6262 if (f != NULL) fprintf(f, "--->");
6263
6264 /* The subject before the match start. */
6265
6266 PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f);
6267
6268 /* If a lookbehind is involved, the current position may be earlier than the
6269 match start. If so, use the match start instead. */
6270
6271 current_position = (cb->current_position >= cb->start_match)?
6272 cb->current_position : cb->start_match;
6273
6274 /* The subject between the match start and the current position. */
6275
6276 PCHARS(post_start, cb->subject, cb->start_match,
6277 current_position - cb->start_match, utf, f);
6278
6279 /* Print from the current position to the end. */
6280
6281 PCHARSV(cb->subject, current_position, cb->subject_length - current_position,
6282 utf, f);
6283
6284 /* Calculate the total subject printed length (no print). */
6285
6286 PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
6287
6288 if (f != NULL) fprintf(f, "\n");
6289
6290 /* For automatic callouts, show the pattern offset. Otherwise, for a
6291 numerical callout whose number has not already been shown with captured
6292 strings, show the number here. A callout with a string argument has been
6293 displayed above. */
6294
6295 if (cb->callout_number == 255)
6296 {
6297 fprintf(outfile, "%+3d ", (int)cb->pattern_position);
6298 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
6299 }
6300 else
6301 {
6302 if (callout_capture || cb->callout_string != NULL) fprintf(outfile, " ");
6303 else fprintf(outfile, "%3d ", cb->callout_number);
6304 }
6305
6306 /* Now show position indicators */
6307
6308 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
6309 fprintf(outfile, "^");
6310
6311 if (post_start > 0)
6312 {
6313 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
6314 fprintf(outfile, "^");
6315 }
6316
6317 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
6318 fprintf(outfile, " ");
6319
6320 if (cb->next_item_length != 0)
6321 fprintf(outfile, "%.*s", (int)(cb->next_item_length),
6322 pbuffer8 + cb->pattern_position);
6323 else
6324 fprintf(outfile, "End of pattern");
6325
6326 fprintf(outfile, "\n");
6327 }
6328
6329first_callout = FALSE;
6330
6331/* Show any mark info */
6332
6333if (cb->mark != last_callout_mark)
6334 {
6335 if (cb->mark == NULL)
6336 fprintf(outfile, "Latest Mark: <unset>\n");
6337 else
6338 {
6339 fprintf(outfile, "Latest Mark: ");
6340 PCHARSV(cb->mark, -1, -1, utf, outfile);
6341 putc('\n', outfile);
6342 }
6343 last_callout_mark = cb->mark;
6344 }
6345
6346/* Show callout data */
6347
6348if (callout_data_ptr != NULL)
6349 {
6350 int callout_data = *((int32_t *)callout_data_ptr);
6351 if (callout_data != 0)
6352 {
6353 fprintf(outfile, "Callout data = %d\n", callout_data);
6354 return callout_data;
6355 }
6356 }
6357
6358/* Keep count and give the appropriate return code */
6359
6360callout_count++;
6361
6362if (cb->callout_number == dat_datctl.cerror[0] &&
6363 callout_count >= dat_datctl.cerror[1])
6364 return PCRE2_ERROR_CALLOUT;
6365
6366if (cb->callout_number == dat_datctl.cfail[0] &&
6367 callout_count >= dat_datctl.cfail[1])
6368 return 1;
6369
6370return 0;
6371}
6372
6373
6374
6375/*************************************************
6376* Handle *MARK and copy/get tests *
6377*************************************************/
6378
6379/* This function is called after complete and partial matches. It runs the
6380tests for substring extraction.
6381
6382Arguments:
6383 utf TRUE for utf
6384 capcount return from pcre2_match()
6385
6386Returns: FALSE if print_error_message() fails
6387*/
6388
6389static BOOL
6390copy_and_get(BOOL utf, int capcount)
6391{
6392int i;
6393uint8_t *nptr;
6394
6395/* Test copy strings by number */
6396
6397for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++)
6398 {
6399 int rc;
6400 PCRE2_SIZE length, length2;
6401 uint32_t copybuffer[256];
6402 uint32_t n = (uint32_t)(dat_datctl.copy_numbers[i]);
6403 length = sizeof(copybuffer)/code_unit_size;
6404 PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, &length);
6405 if (rc < 0)
6406 {
6407 fprintf(outfile, "Copy substring %d failed (%d): ", n, rc);
6408 if (!print_error_message(rc, "", "\n")) return FALSE;
6409 }
6410 else
6411 {
6412 PCRE2_SUBSTRING_LENGTH_BYNUMBER(rc, match_data, n, &length2);
6413 if (rc < 0)
6414 {
6415 fprintf(outfile, "Get substring %d length failed (%d): ", n, rc);
6416 if (!print_error_message(rc, "", "\n")) return FALSE;
6417 }
6418 else if (length2 != length)
6419 {
6420 fprintf(outfile, "Mismatched substring lengths: %"
Elliott Hughes16619d62021-10-29 12:10:38 -07006421 SIZ_FORM " %" SIZ_FORM "\n", length, length2);
Elliott Hughes5b808042021-10-01 10:56:10 -07006422 }
6423 fprintf(outfile, "%2dC ", n);
6424 PCHARSV(copybuffer, 0, length, utf, outfile);
Elliott Hughes16619d62021-10-29 12:10:38 -07006425 fprintf(outfile, " (%" SIZ_FORM ")\n", length);
Elliott Hughes5b808042021-10-01 10:56:10 -07006426 }
6427 }
6428
6429/* Test copy strings by name */
6430
6431nptr = dat_datctl.copy_names;
6432for (;;)
6433 {
6434 int rc;
6435 int groupnumber;
6436 PCRE2_SIZE length, length2;
6437 uint32_t copybuffer[256];
6438 int namelen = strlen((const char *)nptr);
6439#if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6440 PCRE2_SIZE cnl = namelen;
6441#endif
6442 if (namelen == 0) break;
6443
6444#ifdef SUPPORT_PCRE2_8
6445 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6446#endif
6447#ifdef SUPPORT_PCRE2_16
6448 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6449#endif
6450#ifdef SUPPORT_PCRE2_32
6451 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6452#endif
6453
6454 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6455 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6456 fprintf(outfile, "Number not found for group '%s'\n", nptr);
6457
6458 length = sizeof(copybuffer)/code_unit_size;
6459 PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length);
6460 if (rc < 0)
6461 {
6462 fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc);
6463 if (!print_error_message(rc, "", "\n")) return FALSE;
6464 }
6465 else
6466 {
6467 PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2);
6468 if (rc < 0)
6469 {
6470 fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc);
6471 if (!print_error_message(rc, "", "\n")) return FALSE;
6472 }
6473 else if (length2 != length)
6474 {
6475 fprintf(outfile, "Mismatched substring lengths: %"
Elliott Hughes16619d62021-10-29 12:10:38 -07006476 SIZ_FORM " %" SIZ_FORM "\n", length, length2);
Elliott Hughes5b808042021-10-01 10:56:10 -07006477 }
6478 fprintf(outfile, " C ");
6479 PCHARSV(copybuffer, 0, length, utf, outfile);
Elliott Hughes16619d62021-10-29 12:10:38 -07006480 fprintf(outfile, " (%" SIZ_FORM ") %s", length, nptr);
Elliott Hughes5b808042021-10-01 10:56:10 -07006481 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6482 else fprintf(outfile, " (non-unique)\n");
6483 }
6484 nptr += namelen + 1;
6485 }
6486
6487/* Test get strings by number */
6488
6489for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++)
6490 {
6491 int rc;
6492 PCRE2_SIZE length;
6493 void *gotbuffer;
6494 uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
6495 PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer, &length);
6496 if (rc < 0)
6497 {
6498 fprintf(outfile, "Get substring %d failed (%d): ", n, rc);
6499 if (!print_error_message(rc, "", "\n")) return FALSE;
6500 }
6501 else
6502 {
6503 fprintf(outfile, "%2dG ", n);
6504 PCHARSV(gotbuffer, 0, length, utf, outfile);
Elliott Hughes16619d62021-10-29 12:10:38 -07006505 fprintf(outfile, " (%" SIZ_FORM ")\n", length);
Elliott Hughes5b808042021-10-01 10:56:10 -07006506 PCRE2_SUBSTRING_FREE(gotbuffer);
6507 }
6508 }
6509
6510/* Test get strings by name */
6511
6512nptr = dat_datctl.get_names;
6513for (;;)
6514 {
6515 PCRE2_SIZE length;
6516 void *gotbuffer;
6517 int rc;
6518 int groupnumber;
6519 int namelen = strlen((const char *)nptr);
6520#if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6521 PCRE2_SIZE cnl = namelen;
6522#endif
6523 if (namelen == 0) break;
6524
6525#ifdef SUPPORT_PCRE2_8
6526 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6527#endif
6528#ifdef SUPPORT_PCRE2_16
6529 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6530#endif
6531#ifdef SUPPORT_PCRE2_32
6532 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6533#endif
6534
6535 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6536 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6537 fprintf(outfile, "Number not found for group '%s'\n", nptr);
6538
6539 PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length);
6540 if (rc < 0)
6541 {
6542 fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc);
6543 if (!print_error_message(rc, "", "\n")) return FALSE;
6544 }
6545 else
6546 {
6547 fprintf(outfile, " G ");
6548 PCHARSV(gotbuffer, 0, length, utf, outfile);
Elliott Hughes16619d62021-10-29 12:10:38 -07006549 fprintf(outfile, " (%" SIZ_FORM ") %s", length, nptr);
Elliott Hughes5b808042021-10-01 10:56:10 -07006550 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6551 else fprintf(outfile, " (non-unique)\n");
6552 PCRE2_SUBSTRING_FREE(gotbuffer);
6553 }
6554 nptr += namelen + 1;
6555 }
6556
6557/* Test getting the complete list of captured strings. */
6558
6559if ((dat_datctl.control & CTL_GETALL) != 0)
6560 {
6561 int rc;
6562 void **stringlist;
6563 PCRE2_SIZE *lengths;
6564 PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths);
6565 if (rc < 0)
6566 {
6567 fprintf(outfile, "get substring list failed (%d): ", rc);
6568 if (!print_error_message(rc, "", "\n")) return FALSE;
6569 }
6570 else
6571 {
6572 for (i = 0; i < capcount; i++)
6573 {
6574 fprintf(outfile, "%2dL ", i);
6575 PCHARSV(stringlist[i], 0, lengths[i], utf, outfile);
6576 putc('\n', outfile);
6577 }
6578 if (stringlist[i] != NULL)
6579 fprintf(outfile, "string list not terminated by NULL\n");
6580 PCRE2_SUBSTRING_LIST_FREE(stringlist);
6581 }
6582 }
6583
6584return TRUE;
6585}
6586
6587
6588
6589/*************************************************
6590* Show an entire ovector *
6591*************************************************/
6592
6593/* This function is called after partial matching or match failure, when the
6594"allvector" modifier is set. It is a means of checking the contents of the
6595entire ovector, to ensure no modification of fields that should be unchanged.
6596
6597Arguments:
6598 ovector points to the ovector
6599 oveccount number of pairs
6600
6601Returns: nothing
6602*/
6603
6604static void
6605show_ovector(PCRE2_SIZE *ovector, uint32_t oveccount)
6606{
6607uint32_t i;
6608for (i = 0; i < 2*oveccount; i += 2)
6609 {
6610 PCRE2_SIZE start = ovector[i];
6611 PCRE2_SIZE end = ovector[i+1];
6612
6613 fprintf(outfile, "%2d: ", i/2);
6614 if (start == PCRE2_UNSET && end == PCRE2_UNSET)
6615 fprintf(outfile, "<unset>\n");
6616 else if (start == JUNK_OFFSET && end == JUNK_OFFSET)
6617 fprintf(outfile, "<unchanged>\n");
6618 else
6619 fprintf(outfile, "%ld %ld\n", (unsigned long int)start,
6620 (unsigned long int)end);
6621 }
6622}
6623
6624
6625/*************************************************
6626* Process a data line *
6627*************************************************/
6628
6629/* The line is in buffer; it will not be empty.
6630
6631Arguments: none
6632
6633Returns: PR_OK continue processing next line
6634 PR_SKIP skip to a blank line
6635 PR_ABEND abort the pcre2test run
6636*/
6637
6638static int
6639process_data(void)
6640{
6641PCRE2_SIZE len, ulen, arg_ulen;
6642uint32_t gmatched;
6643uint32_t c, k;
6644uint32_t g_notempty = 0;
6645uint8_t *p, *pp, *start_rep;
6646size_t needlen;
6647void *use_dat_context;
6648BOOL utf;
6649BOOL subject_literal;
6650
6651PCRE2_SIZE *ovector;
6652PCRE2_SIZE ovecsave[3];
6653uint32_t oveccount;
6654
6655#ifdef SUPPORT_PCRE2_8
6656uint8_t *q8 = NULL;
6657#endif
6658#ifdef SUPPORT_PCRE2_16
6659uint16_t *q16 = NULL;
6660#endif
6661#ifdef SUPPORT_PCRE2_32
6662uint32_t *q32 = NULL;
6663#endif
6664
6665subject_literal = (pat_patctl.control2 & CTL2_SUBJECT_LITERAL) != 0;
6666
6667/* Copy the default context and data control blocks to the active ones. Then
6668copy from the pattern the controls that can be set in either the pattern or the
6669data. This allows them to be overridden in the data line. We do not do this for
6670options because those that are common apply separately to compiling and
6671matching. */
6672
6673DATCTXCPY(dat_context, default_dat_context);
6674memcpy(&dat_datctl, &def_datctl, sizeof(datctl));
6675dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
6676dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD);
6677strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement);
6678if (dat_datctl.jitstack == 0) dat_datctl.jitstack = pat_patctl.jitstack;
6679
6680if (dat_datctl.substitute_skip == 0)
6681 dat_datctl.substitute_skip = pat_patctl.substitute_skip;
6682if (dat_datctl.substitute_stop == 0)
6683 dat_datctl.substitute_stop = pat_patctl.substitute_stop;
6684
6685/* Initialize for scanning the data line. */
6686
6687#ifdef SUPPORT_PCRE2_8
6688utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
6689 ((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options :
6690 FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0;
6691#else
6692utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6693#endif
6694
6695start_rep = NULL;
6696len = strlen((const char *)buffer);
6697while (len > 0 && isspace(buffer[len-1])) len--;
6698buffer[len] = 0;
6699p = buffer;
6700while (isspace(*p)) p++;
6701
6702/* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
6703invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */
6704
6705if (utf)
6706 {
6707 uint8_t *q;
6708 uint32_t cc;
6709 int n = 1;
6710 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
6711 if (n <= 0)
6712 {
6713 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input "
6714 "in UTF mode\n");
6715 return PR_OK;
6716 }
6717 }
6718
6719#ifdef SUPPORT_VALGRIND
6720/* Mark the dbuffer as addressable but undefined again. */
6721if (dbuffer != NULL)
6722 {
6723 VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size);
6724 }
6725#endif
6726
6727/* Allocate a buffer to hold the data line; len+1 is an upper bound on
6728the number of code units that will be needed (though the buffer may have to be
6729extended if replication is involved). */
6730
6731needlen = (size_t)((len+1) * code_unit_size);
6732if (dbuffer == NULL || needlen >= dbuffer_size)
6733 {
6734 while (needlen >= dbuffer_size) dbuffer_size *= 2;
6735 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6736 if (dbuffer == NULL)
6737 {
6738 fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
6739 exit(1);
6740 }
6741 }
6742SETCASTPTR(q, dbuffer); /* Sets q8, q16, or q32, as appropriate. */
6743
6744/* Scan the data line, interpreting data escapes, and put the result into a
6745buffer of the appropriate width. In UTF mode, input is always UTF-8; otherwise,
6746in 16- and 32-bit modes, it can be forced to UTF-8 by the utf8_input modifier.
6747*/
6748
6749while ((c = *p++) != 0)
6750 {
6751 int32_t i = 0;
6752 size_t replen;
6753
6754 /* ] may mark the end of a replicated sequence */
6755
6756 if (c == ']' && start_rep != NULL)
6757 {
6758 long li;
6759 char *endptr;
6760 size_t qoffset = CAST8VAR(q) - dbuffer;
6761 size_t rep_offset = start_rep - dbuffer;
6762
6763 if (*p++ != '{')
6764 {
6765 fprintf(outfile, "** Expected '{' after \\[....]\n");
6766 return PR_OK;
6767 }
6768
6769 li = strtol((const char *)p, &endptr, 10);
6770 if (S32OVERFLOW(li))
6771 {
6772 fprintf(outfile, "** Repeat count too large\n");
6773 return PR_OK;
6774 }
6775
6776 p = (uint8_t *)endptr;
6777 if (*p++ != '}')
6778 {
6779 fprintf(outfile, "** Expected '}' after \\[...]{...\n");
6780 return PR_OK;
6781 }
6782
6783 i = (int32_t)li;
6784 if (i-- == 0)
6785 {
6786 fprintf(outfile, "** Zero repeat not allowed\n");
6787 return PR_OK;
6788 }
6789
6790 replen = CAST8VAR(q) - start_rep;
6791 needlen += replen * i;
6792
6793 if (needlen >= dbuffer_size)
6794 {
6795 while (needlen >= dbuffer_size) dbuffer_size *= 2;
6796 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6797 if (dbuffer == NULL)
6798 {
6799 fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
6800 exit(1);
6801 }
6802 SETCASTPTR(q, dbuffer + qoffset);
6803 start_rep = dbuffer + rep_offset;
6804 }
6805
6806 while (i-- > 0)
6807 {
6808 memcpy(CAST8VAR(q), start_rep, replen);
6809 SETPLUS(q, replen/code_unit_size);
6810 }
6811
6812 start_rep = NULL;
6813 continue;
6814 }
6815
6816 /* Handle a non-escaped character. In non-UTF 32-bit mode with utf8_input
6817 set, do the fudge for setting the top bit. */
6818
6819 if (c != '\\' || subject_literal)
6820 {
6821 uint32_t topbit = 0;
6822 if (test_mode == PCRE32_MODE && c == 0xff && *p != 0)
6823 {
6824 topbit = 0x80000000;
6825 c = *p++;
6826 }
6827 if ((utf || (pat_patctl.control & CTL_UTF8_INPUT) != 0) &&
6828 HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
6829 c |= topbit;
6830 }
6831
6832 /* Handle backslash escapes */
6833
6834 else switch ((c = *p++))
6835 {
6836 case '\\': break;
6837 case 'a': c = CHAR_BEL; break;
6838 case 'b': c = '\b'; break;
6839 case 'e': c = CHAR_ESC; break;
6840 case 'f': c = '\f'; break;
6841 case 'n': c = '\n'; break;
6842 case 'r': c = '\r'; break;
6843 case 't': c = '\t'; break;
6844 case 'v': c = '\v'; break;
6845
6846 case '0': case '1': case '2': case '3':
6847 case '4': case '5': case '6': case '7':
6848 c -= '0';
6849 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
6850 c = c * 8 + *p++ - '0';
6851 break;
6852
6853 case 'o':
6854 if (*p == '{')
6855 {
6856 uint8_t *pt = p;
6857 c = 0;
6858 for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
6859 {
6860 if (++i == 12)
6861 fprintf(outfile, "** Too many octal digits in \\o{...} item; "
6862 "using only the first twelve.\n");
6863 else c = c * 8 + *pt - '0';
6864 }
6865 if (*pt == '}') p = pt + 1;
6866 else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
6867 }
6868 break;
6869
6870 case 'x':
6871 if (*p == '{')
6872 {
6873 uint8_t *pt = p;
6874 c = 0;
6875
6876 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
6877 when isxdigit() is a macro that refers to its argument more than
6878 once. This is banned by the C Standard, but apparently happens in at
6879 least one MacOS environment. */
6880
6881 for (pt++; isxdigit(*pt); pt++)
6882 {
6883 if (++i == 9)
6884 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
6885 "using only the first eight.\n");
6886 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
6887 }
6888 if (*pt == '}')
6889 {
6890 p = pt + 1;
6891 break;
6892 }
6893 /* Not correct form for \x{...}; fall through */
6894 }
6895
6896 /* \x without {} always defines just one byte in 8-bit mode. This
6897 allows UTF-8 characters to be constructed byte by byte, and also allows
6898 invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode.
6899 Otherwise, pass it down as data. */
6900
6901 c = 0;
6902 while (i++ < 2 && isxdigit(*p))
6903 {
6904 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
6905 p++;
6906 }
6907#if defined SUPPORT_PCRE2_8
6908 if (utf && (test_mode == PCRE8_MODE))
6909 {
6910 *q8++ = c;
6911 continue;
6912 }
6913#endif
6914 break;
6915
6916 case 0: /* \ followed by EOF allows for an empty line */
6917 p--;
6918 continue;
6919
6920 case '=': /* \= terminates the data, starts modifiers */
6921 goto ENDSTRING;
6922
6923 case '[': /* \[ introduces a replicated character sequence */
6924 if (start_rep != NULL)
6925 {
6926 fprintf(outfile, "** Nested replication is not supported\n");
6927 return PR_OK;
6928 }
6929 start_rep = CAST8VAR(q);
6930 continue;
6931
6932 default:
6933 if (isalnum(c))
6934 {
6935 fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
6936 return PR_OK;
6937 }
6938 }
6939
6940 /* We now have a character value in c that may be greater than 255.
6941 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
6942 than 127 in UTF mode must have come from \x{...} or octal constructs
6943 because values from \x.. get this far only in non-UTF mode. */
6944
6945#ifdef SUPPORT_PCRE2_8
6946 if (test_mode == PCRE8_MODE)
6947 {
6948 if (utf)
6949 {
6950 if (c > 0x7fffffff)
6951 {
6952 fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
6953 "and so cannot be converted to UTF-8\n", c);
6954 return PR_OK;
6955 }
6956 q8 += ord2utf8(c, q8);
6957 }
6958 else
6959 {
6960 if (c > 0xffu)
6961 {
6962 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
6963 "and UTF-8 mode is not enabled.\n", c);
6964 fprintf(outfile, "** Truncation will probably give the wrong "
6965 "result.\n");
6966 }
6967 *q8++ = (uint8_t)c;
6968 }
6969 }
6970#endif
6971#ifdef SUPPORT_PCRE2_16
6972 if (test_mode == PCRE16_MODE)
6973 {
6974 if (utf)
6975 {
6976 if (c > 0x10ffffu)
6977 {
6978 fprintf(outfile, "** Failed: character \\x{%x} is greater than "
6979 "0x10ffff and so cannot be converted to UTF-16\n", c);
6980 return PR_OK;
6981 }
6982 else if (c >= 0x10000u)
6983 {
6984 c-= 0x10000u;
6985 *q16++ = 0xD800 | (c >> 10);
6986 *q16++ = 0xDC00 | (c & 0x3ff);
6987 }
6988 else
6989 *q16++ = c;
6990 }
6991 else
6992 {
6993 if (c > 0xffffu)
6994 {
6995 fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
6996 "and UTF-16 mode is not enabled.\n", c);
6997 fprintf(outfile, "** Truncation will probably give the wrong "
6998 "result.\n");
6999 }
7000
7001 *q16++ = (uint16_t)c;
7002 }
7003 }
7004#endif
7005#ifdef SUPPORT_PCRE2_32
7006 if (test_mode == PCRE32_MODE)
7007 {
7008 *q32++ = c;
7009 }
7010#endif
7011 }
7012
7013ENDSTRING:
7014SET(*q, 0);
7015len = CASTVAR(uint8_t *, q) - dbuffer; /* Length in bytes */
7016ulen = len/code_unit_size; /* Length in code units */
7017arg_ulen = ulen; /* Value to use in match arg */
7018
7019/* If the string was terminated by \= we must now interpret modifiers. */
7020
7021if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
7022 return PR_OK;
7023
7024/* Setting substitute_{skip,fail} implies a substitute callout. */
7025
7026if (dat_datctl.substitute_skip != 0 || dat_datctl.substitute_stop != 0)
7027 dat_datctl.control2 |= CTL2_SUBSTITUTE_CALLOUT;
7028
7029/* Check for mutually exclusive modifiers. At present, these are all in the
7030first control word. */
7031
7032for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++)
7033 {
7034 c = dat_datctl.control & exclusive_dat_controls[k];
7035 if (c != 0 && c != (c & (~c+1)))
7036 {
7037 show_controls(c, 0, "** Not allowed together:");
7038 fprintf(outfile, "\n");
7039 return PR_OK;
7040 }
7041 }
7042
7043if (pat_patctl.replacement[0] != 0)
7044 {
7045 if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0 &&
7046 (dat_datctl.control & CTL_NULLCONTEXT) != 0)
7047 {
7048 fprintf(outfile, "** Replacement callouts are not supported with null_context.\n");
7049 return PR_OK;
7050 }
7051
7052 if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
7053 fprintf(outfile, "** Ignored with replacement text: allcaptures\n");
7054 }
7055
7056/* Warn for modifiers that are ignored for DFA. */
7057
7058if ((dat_datctl.control & CTL_DFA) != 0)
7059 {
7060 if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
7061 fprintf(outfile, "** Ignored after DFA matching: allcaptures\n");
7062 }
7063
7064/* We now have the subject in dbuffer, with len containing the byte length, and
7065ulen containing the code unit length, with a copy in arg_ulen for use in match
7066function arguments (this gets changed to PCRE2_ZERO_TERMINATED when the
7067zero_terminate modifier is present).
7068
7069Move the data to the end of the buffer so that a read over the end can be
7070caught by valgrind or other means. If we have explicit valgrind support, mark
7071the unused start of the buffer unaddressable. If we are using the POSIX
7072interface, or testing zero-termination, we must include the terminating zero in
7073the usable data. */
7074
7075c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
7076 (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
7077pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c);
7078#ifdef SUPPORT_VALGRIND
7079 VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
7080#endif
7081
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007082/* Now pp points to the subject string, but if null_subject was specified, set
7083it to NULL to test PCRE2's behaviour. */
7084
7085if ((dat_datctl.control2 & CTL2_NULL_SUBJECT) != 0) pp = NULL;
7086
7087/* POSIX matching is only possible in 8-bit mode, and it does not support
7088timing or other fancy features. Some were checked at compile time, but we need
7089to check the match-time settings here. */
Elliott Hughes5b808042021-10-01 10:56:10 -07007090
7091#ifdef SUPPORT_PCRE2_8
7092if ((pat_patctl.control & CTL_POSIX) != 0)
7093 {
7094 int rc;
7095 int eflags = 0;
7096 regmatch_t *pmatch = NULL;
7097 const char *msg = "** Ignored with POSIX interface:";
7098
7099 if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET)
7100 prmsg(&msg, "callout_error");
7101 if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET)
7102 prmsg(&msg, "callout_fail");
7103 if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
7104 prmsg(&msg, "copy");
7105 if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0)
7106 prmsg(&msg, "get");
7107 if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack");
7108 if (dat_datctl.offset != 0) prmsg(&msg, "offset");
7109
7110 if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0)
7111 {
7112 fprintf(outfile, "%s", msg);
7113 show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS);
7114 msg = "";
7115 }
7116 if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 ||
7117 (dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0)
7118 {
7119 show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS,
7120 dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg);
7121 msg = "";
7122 }
7123
7124 if (msg[0] == 0) fprintf(outfile, "\n");
7125
7126 if (dat_datctl.oveccount > 0)
7127 {
7128 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount);
7129 if (pmatch == NULL)
7130 {
7131 fprintf(outfile, "** Failed to get memory for recording matching "
7132 "information (size set = %du)\n", dat_datctl.oveccount);
7133 return PR_OK;
7134 }
7135 }
7136
7137 if (dat_datctl.startend[0] != CFORE_UNSET)
7138 {
7139 pmatch[0].rm_so = dat_datctl.startend[0];
7140 pmatch[0].rm_eo = (dat_datctl.startend[1] != 0)?
7141 dat_datctl.startend[1] : len;
7142 eflags |= REG_STARTEND;
7143 }
7144
7145 if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
7146 if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
7147 if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
7148
7149 rc = regexec(&preg, (const char *)pp, dat_datctl.oveccount, pmatch, eflags);
7150 if (rc != 0)
7151 {
7152 (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size);
7153 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, pbuffer8);
7154 }
7155 else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0)
7156 fprintf(outfile, "Matched with REG_NOSUB\n");
7157 else if (dat_datctl.oveccount == 0)
7158 fprintf(outfile, "Matched without capture\n");
7159 else
7160 {
7161 size_t i, j;
7162 size_t last_printed = (size_t)dat_datctl.oveccount;
7163 for (i = 0; i < (size_t)dat_datctl.oveccount; i++)
7164 {
7165 if (pmatch[i].rm_so >= 0)
7166 {
7167 PCRE2_SIZE start = pmatch[i].rm_so;
7168 PCRE2_SIZE end = pmatch[i].rm_eo;
7169 for (j = last_printed + 1; j < i; j++)
7170 fprintf(outfile, "%2d: <unset>\n", (int)j);
7171 last_printed = i;
7172 if (start > end)
7173 {
7174 start = pmatch[i].rm_eo;
7175 end = pmatch[i].rm_so;
7176 fprintf(outfile, "Start of matched string is beyond its end - "
7177 "displaying from end to start.\n");
7178 }
7179 fprintf(outfile, "%2d: ", (int)i);
7180 PCHARSV(pp, start, end - start, utf, outfile);
7181 fprintf(outfile, "\n");
7182
7183 if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
7184 (dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
7185 {
7186 fprintf(outfile, "%2d+ ", (int)i);
7187 /* Note: don't use the start/end variables here because we want to
7188 show the text from what is reported as the end. */
7189 PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf, outfile);
7190 fprintf(outfile, "\n"); }
7191 }
7192 }
7193 }
7194 free(pmatch);
7195 return PR_OK;
7196 }
7197#endif /* SUPPORT_PCRE2_8 */
7198
7199 /* Handle matching via the native interface. Check for consistency of
7200modifiers. */
7201
7202if (dat_datctl.startend[0] != CFORE_UNSET)
7203 fprintf(outfile, "** \\=posix_startend ignored for non-POSIX matching\n");
7204
7205/* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
7206matching, even if the JIT compiler was used. */
7207
7208if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT &&
7209 FLD(compiled_code, executable_jit) != NULL)
7210 {
7211 fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n");
7212 dat_datctl.control &= ~CTL_ALLUSEDTEXT;
7213 }
7214
7215/* Handle passing the subject as zero-terminated. */
7216
7217if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7218 arg_ulen = PCRE2_ZERO_TERMINATED;
7219
7220/* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a
7221NULL context. */
7222
7223use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)?
7224 NULL : PTR(dat_context);
7225
7226/* Enable display of malloc/free if wanted. We can do this only if either the
7227pattern or the subject is processed with a context. */
7228
7229show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
7230
7231if (show_memory &&
7232 (pat_patctl.control & dat_datctl.control & CTL_NULLCONTEXT) != 0)
7233 fprintf(outfile, "** \\=memory requires either a pattern or a subject "
7234 "context: ignored\n");
7235
7236/* Create and assign a JIT stack if requested. */
7237
7238if (dat_datctl.jitstack != 0)
7239 {
7240 if (dat_datctl.jitstack != jit_stack_size)
7241 {
7242 PCRE2_JIT_STACK_FREE(jit_stack);
7243 PCRE2_JIT_STACK_CREATE(jit_stack, 1, dat_datctl.jitstack * 1024, NULL);
7244 jit_stack_size = dat_datctl.jitstack;
7245 }
7246 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, jit_stack);
7247 }
7248
7249/* Or de-assign */
7250
7251else if (jit_stack != NULL)
7252 {
7253 PCRE2_JIT_STACK_ASSIGN(dat_context, NULL, NULL);
7254 PCRE2_JIT_STACK_FREE(jit_stack);
7255 jit_stack = NULL;
7256 jit_stack_size = 0;
7257 }
7258
7259/* When no JIT stack is assigned, we must ensure that there is a JIT callback
7260if we want to verify that JIT was actually used. */
7261
7262if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL)
7263 {
7264 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, NULL);
7265 }
7266
7267/* Adjust match_data according to size of offsets required. A size of zero
7268causes a new match data block to be obtained that exactly fits the pattern. */
7269
7270if (dat_datctl.oveccount == 0)
7271 {
7272 PCRE2_MATCH_DATA_FREE(match_data);
7273 PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(match_data, compiled_code, NULL);
7274 PCRE2_GET_OVECTOR_COUNT(max_oveccount, match_data);
7275 }
7276else if (dat_datctl.oveccount <= max_oveccount)
7277 {
7278 SETFLD(match_data, oveccount, dat_datctl.oveccount);
7279 }
7280else
7281 {
7282 max_oveccount = dat_datctl.oveccount;
7283 PCRE2_MATCH_DATA_FREE(match_data);
7284 PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL);
7285 }
7286
7287if (CASTVAR(void *, match_data) == NULL)
7288 {
7289 fprintf(outfile, "** Failed to get memory for recording matching "
7290 "information (size requested: %d)\n", dat_datctl.oveccount);
7291 max_oveccount = 0;
7292 return PR_OK;
7293 }
7294
7295ovector = FLD(match_data, ovector);
7296PCRE2_GET_OVECTOR_COUNT(oveccount, match_data);
7297
7298/* Replacement processing is ignored for DFA matching. */
7299
7300if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0)
7301 {
7302 fprintf(outfile, "** Ignored for DFA matching: replace\n");
7303 dat_datctl.replacement[0] = 0;
7304 }
7305
7306/* If a replacement string is provided, call pcre2_substitute() instead of one
7307of the matching functions. First we have to convert the replacement string to
7308the appropriate width. */
7309
7310if (dat_datctl.replacement[0] != 0)
7311 {
7312 int rc;
7313 uint8_t *pr;
7314 uint8_t rbuffer[REPLACE_BUFFSIZE];
7315 uint8_t nbuffer[REPLACE_BUFFSIZE];
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007316 uint8_t *rbptr;
Elliott Hughes5b808042021-10-01 10:56:10 -07007317 uint32_t xoptions;
7318 uint32_t emoption; /* External match option */
7319 PCRE2_SIZE j, rlen, nsize, erroroffset;
7320 BOOL badutf = FALSE;
7321
7322#ifdef SUPPORT_PCRE2_8
7323 uint8_t *r8 = NULL;
7324#endif
7325#ifdef SUPPORT_PCRE2_16
7326 uint16_t *r16 = NULL;
7327#endif
7328#ifdef SUPPORT_PCRE2_32
7329 uint32_t *r32 = NULL;
7330#endif
7331
7332 /* Fill the ovector with junk to detect elements that do not get set
7333 when they should be (relevant only when "allvector" is specified). */
7334
7335 for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
7336
7337 if (timeitm)
7338 fprintf(outfile, "** Timing is not supported with replace: ignored\n");
7339
7340 if ((dat_datctl.control & CTL_ALTGLOBAL) != 0)
7341 fprintf(outfile, "** Altglobal is not supported with replace: ignored\n");
7342
7343 /* Check for a test that does substitution after an initial external match.
7344 If this is set, we run the external match, but leave the interpretation of
7345 its output to pcre2_substitute(). */
7346
7347 emoption = ((dat_datctl.control2 & CTL2_SUBSTITUTE_MATCHED) == 0)? 0 :
7348 PCRE2_SUBSTITUTE_MATCHED;
7349
7350 if (emoption != 0)
7351 {
7352 PCRE2_MATCH(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7353 dat_datctl.options, match_data, use_dat_context);
7354 }
7355
7356 xoptions = emoption |
7357 (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
7358 PCRE2_SUBSTITUTE_GLOBAL) |
7359 (((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 :
7360 PCRE2_SUBSTITUTE_EXTENDED) |
7361 (((dat_datctl.control2 & CTL2_SUBSTITUTE_LITERAL) == 0)? 0 :
7362 PCRE2_SUBSTITUTE_LITERAL) |
7363 (((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 :
7364 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) |
7365 (((dat_datctl.control2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) == 0)? 0 :
7366 PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) |
7367 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 :
7368 PCRE2_SUBSTITUTE_UNKNOWN_UNSET) |
7369 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 :
7370 PCRE2_SUBSTITUTE_UNSET_EMPTY);
7371
7372 SETCASTPTR(r, rbuffer); /* Sets r8, r16, or r32, as appropriate. */
7373 pr = dat_datctl.replacement;
7374
7375 /* If the replacement starts with '[<number>]' we interpret that as length
7376 value for the replacement buffer. */
7377
7378 nsize = REPLACE_BUFFSIZE/code_unit_size;
7379 if (*pr == '[')
7380 {
7381 PCRE2_SIZE n = 0;
7382 while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0;
7383 if (*pr++ != ']')
7384 {
7385 fprintf(outfile, "Bad buffer size in replacement string\n");
7386 return PR_OK;
7387 }
7388 if (n > nsize)
7389 {
7390 fprintf(outfile, "Replacement buffer setting (%" SIZ_FORM ") is too "
Elliott Hughes16619d62021-10-29 12:10:38 -07007391 "large (max %" SIZ_FORM ")\n", n, nsize);
Elliott Hughes5b808042021-10-01 10:56:10 -07007392 return PR_OK;
7393 }
7394 nsize = n;
7395 }
7396
7397 /* Now copy the replacement string to a buffer of the appropriate width. No
7398 escape processing is done for replacements. In UTF mode, check for an invalid
7399 UTF-8 input string, and if it is invalid, just copy its code units without
7400 UTF interpretation. This provides a means of checking that an invalid string
7401 is detected. Otherwise, UTF-8 can be used to include wide characters in a
7402 replacement. */
7403
7404 if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset);
7405
7406 /* Not UTF or invalid UTF-8: just copy the code units. */
7407
7408 if (!utf || badutf)
7409 {
7410 while ((c = *pr++) != 0)
7411 {
7412#ifdef SUPPORT_PCRE2_8
7413 if (test_mode == PCRE8_MODE) *r8++ = c;
7414#endif
7415#ifdef SUPPORT_PCRE2_16
7416 if (test_mode == PCRE16_MODE) *r16++ = c;
7417#endif
7418#ifdef SUPPORT_PCRE2_32
7419 if (test_mode == PCRE32_MODE) *r32++ = c;
7420#endif
7421 }
7422 }
7423
7424 /* Valid UTF-8 replacement string */
7425
7426 else while ((c = *pr++) != 0)
7427 {
7428 if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); }
7429
7430#ifdef SUPPORT_PCRE2_8
7431 if (test_mode == PCRE8_MODE) r8 += ord2utf8(c, r8);
7432#endif
7433
7434#ifdef SUPPORT_PCRE2_16
7435 if (test_mode == PCRE16_MODE)
7436 {
7437 if (c >= 0x10000u)
7438 {
7439 c-= 0x10000u;
7440 *r16++ = 0xD800 | (c >> 10);
7441 *r16++ = 0xDC00 | (c & 0x3ff);
7442 }
7443 else *r16++ = c;
7444 }
7445#endif
7446
7447#ifdef SUPPORT_PCRE2_32
7448 if (test_mode == PCRE32_MODE) *r32++ = c;
7449#endif
7450 }
7451
7452 SET(*r, 0);
7453 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7454 rlen = PCRE2_ZERO_TERMINATED;
7455 else
7456 rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
7457
7458 if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0)
7459 {
7460 PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, substitute_callout_function, NULL);
7461 }
7462 else
7463 {
7464 PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, NULL, NULL); /* No callout */
7465 }
7466
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007467 /* There is a special option to set the replacement to NULL in order to test
7468 that case. */
7469
7470 rbptr = ((dat_datctl.control2 & CTL2_NULL_REPLACEMENT) == 0)? rbuffer : NULL;
7471
Elliott Hughes5b808042021-10-01 10:56:10 -07007472 PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7473 dat_datctl.options|xoptions, match_data, use_dat_context,
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007474 rbptr, rlen, nbuffer, &nsize);
Elliott Hughes5b808042021-10-01 10:56:10 -07007475
7476 if (rc < 0)
7477 {
7478 fprintf(outfile, "Failed: error %d", rc);
7479 if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET)
7480 fprintf(outfile, " at offset %ld in replacement", (long int)nsize);
7481 fprintf(outfile, ": ");
7482 if (!print_error_message(rc, "", "")) return PR_ABEND;
7483 if (rc == PCRE2_ERROR_NOMEMORY &&
7484 (xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)
7485 fprintf(outfile, ": %ld code units are needed", (long int)nsize);
7486 }
7487 else
7488 {
7489 fprintf(outfile, "%2d: ", rc);
7490 PCHARSV(nbuffer, 0, nsize, utf, outfile);
7491 }
7492
7493 fprintf(outfile, "\n");
7494 show_memory = FALSE;
7495
7496 /* Show final ovector contents if requested. */
7497
7498 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7499 show_ovector(ovector, oveccount);
7500
7501 return PR_OK;
7502 } /* End of substitution handling */
7503
7504/* When a replacement string is not provided, run a loop for global matching
7505with one of the basic matching functions. For altglobal (or first time round
7506the loop), set an "unset" value for the previous match info. */
7507
7508ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
7509
7510for (gmatched = 0;; gmatched++)
7511 {
7512 PCRE2_SIZE j;
7513 int capcount;
7514
7515 /* Fill the ovector with junk to detect elements that do not get set
7516 when they should be. */
7517
7518 for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
7519
7520 /* When matching is via pcre2_match(), we will detect the use of JIT via the
7521 stack callback function. */
7522
7523 jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0;
7524
7525 /* Do timing if required. */
7526
7527 if (timeitm > 0)
7528 {
7529 int i;
7530 clock_t start_time, time_taken;
7531
7532 if ((dat_datctl.control & CTL_DFA) != 0)
7533 {
7534 if ((dat_datctl.options & PCRE2_DFA_RESTART) != 0)
7535 {
7536 fprintf(outfile, "Timing DFA restarts is not supported\n");
7537 return PR_OK;
7538 }
7539 if (dfa_workspace == NULL)
7540 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7541 start_time = clock();
7542 for (i = 0; i < timeitm; i++)
7543 {
7544 PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7545 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7546 use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7547 }
7548 }
7549
7550 else if ((pat_patctl.control & CTL_JITFAST) != 0)
7551 {
7552 start_time = clock();
7553 for (i = 0; i < timeitm; i++)
7554 {
7555 PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen,
7556 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7557 use_dat_context);
7558 }
7559 }
7560
7561 else
7562 {
7563 start_time = clock();
7564 for (i = 0; i < timeitm; i++)
7565 {
7566 PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen,
7567 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7568 use_dat_context);
7569 }
7570 }
7571 total_match_time += (time_taken = clock() - start_time);
7572 fprintf(outfile, "Match time %.4f milliseconds\n",
7573 (((double)time_taken * 1000.0) / (double)timeitm) /
7574 (double)CLOCKS_PER_SEC);
7575 }
7576
7577 /* Find the heap, match and depth limits if requested. The depth and heap
7578 limits are not relevant for JIT. The return from check_match_limit() is the
7579 return from the final call to pcre2_match() or pcre2_dfa_match(). */
7580
7581 if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
7582 {
7583 capcount = 0; /* This stops compiler warnings */
7584
7585 if (FLD(compiled_code, executable_jit) == NULL ||
7586 (dat_datctl.options & PCRE2_NO_JIT) != 0)
7587 {
7588 (void)check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT, "heap");
7589 }
7590
7591 capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT,
7592 "match");
7593
7594 if (FLD(compiled_code, executable_jit) == NULL ||
7595 (dat_datctl.options & PCRE2_NO_JIT) != 0 ||
7596 (dat_datctl.control & CTL_DFA) != 0)
7597 {
7598 capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
7599 "depth");
7600 }
7601
7602 if (capcount == 0)
7603 {
7604 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7605 capcount = dat_datctl.oveccount;
7606 }
7607 }
7608
7609 /* Otherwise just run a single match, setting up a callout if required (the
7610 default). There is a copy of the pattern in pbuffer8 for use by callouts. */
7611
7612 else
7613 {
7614 if ((dat_datctl.control & CTL_CALLOUT_NONE) == 0)
7615 {
7616 PCRE2_SET_CALLOUT(dat_context, callout_function,
7617 (void *)(&dat_datctl.callout_data));
7618 first_callout = TRUE;
7619 last_callout_mark = NULL;
7620 callout_count = 0;
7621 }
7622 else
7623 {
7624 PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */
7625 }
7626
7627 /* Run a single DFA or NFA match. */
7628
7629 if ((dat_datctl.control & CTL_DFA) != 0)
7630 {
7631 if (dfa_workspace == NULL)
7632 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7633 if (dfa_matched++ == 0)
7634 dfa_workspace[0] = -1; /* To catch bad restart */
7635 PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7636 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7637 use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7638 if (capcount == 0)
7639 {
7640 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7641 capcount = dat_datctl.oveccount;
7642 }
7643 }
7644 else
7645 {
7646 if ((pat_patctl.control & CTL_JITFAST) != 0)
7647 PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7648 dat_datctl.options | g_notempty, match_data, use_dat_context);
7649 else
7650 PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7651 dat_datctl.options | g_notempty, match_data, use_dat_context);
7652 if (capcount == 0)
7653 {
7654 fprintf(outfile, "Matched, but too many substrings\n");
7655 capcount = dat_datctl.oveccount;
7656 }
7657 }
7658 }
7659
7660 /* The result of the match is now in capcount. First handle a successful
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007661 match. If pp was forced to be NULL (to test NULL handling) it will have been
7662 treated as an empty string if the length was zero. So re-create that for
7663 outputting. */
Elliott Hughes5b808042021-10-01 10:56:10 -07007664
7665 if (capcount >= 0)
7666 {
7667 int i;
7668
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007669 if (pp == NULL) pp = (uint8_t *)"";
7670
Elliott Hughes5b808042021-10-01 10:56:10 -07007671 if (capcount > (int)oveccount) /* Check for lunatic return value */
7672 {
7673 fprintf(outfile,
7674 "** PCRE2 error: returned count %d is too big for ovector count %d\n",
7675 capcount, oveccount);
7676 capcount = oveccount;
7677 if ((dat_datctl.control & CTL_ANYGLOB) != 0)
7678 {
7679 fprintf(outfile, "** Global loop abandoned\n");
7680 dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */
7681 }
7682 }
7683
7684 /* If PCRE2_COPY_MATCHED_SUBJECT was set, check that things are as they
7685 should be, but not for fast JIT, where it isn't supported. */
7686
7687 if ((dat_datctl.options & PCRE2_COPY_MATCHED_SUBJECT) != 0 &&
7688 (pat_patctl.control & CTL_JITFAST) == 0)
7689 {
7690 if ((FLD(match_data, flags) & PCRE2_MD_COPIED_SUBJECT) == 0)
7691 fprintf(outfile,
7692 "** PCRE2 error: flag not set after copy_matched_subject\n");
7693
7694 if (CASTFLD(void *, match_data, subject) == pp)
7695 fprintf(outfile,
7696 "** PCRE2 error: copy_matched_subject has not copied\n");
7697
7698 if (memcmp(CASTFLD(void *, match_data, subject), pp, ulen) != 0)
7699 fprintf(outfile,
7700 "** PCRE2 error: copy_matched_subject mismatch\n");
7701 }
7702
7703 /* If this is not the first time round a global loop, check that the
7704 returned string has changed. If it has not, check for an empty string match
7705 at different starting offset from the previous match. This is a failed test
7706 retry for null-matching patterns that don't match at their starting offset,
7707 for example /(?<=\G.)/. A repeated match at the same point is not such a
7708 pattern, and must be discarded, and we then proceed to seek a non-null
7709 match at the current point. For any other repeated match, there is a bug
7710 somewhere and we must break the loop because it will go on for ever. We
7711 know that there are always at least two elements in the ovector. */
7712
7713 if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
7714 {
7715 if (ovector[0] == ovector[1] && ovecsave[2] != dat_datctl.offset)
7716 {
7717 g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
7718 ovecsave[2] = dat_datctl.offset;
7719 continue; /* Back to the top of the loop */
7720 }
7721 fprintf(outfile,
7722 "** PCRE2 error: global repeat returned the same string as previous\n");
7723 fprintf(outfile, "** Global loop abandoned\n");
7724 dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */
7725 }
7726
7727 /* "allcaptures" requests showing of all captures in the pattern, to check
7728 unset ones at the end. It may be set on the pattern or the data. Implement
7729 by setting capcount to the maximum. This is not relevant for DFA matching,
7730 so ignore it (warning given above). */
7731
7732 if ((dat_datctl.control & (CTL_ALLCAPTURES|CTL_DFA)) == CTL_ALLCAPTURES)
7733 {
7734 capcount = maxcapcount + 1; /* Allow for full match */
7735 if (capcount > (int)oveccount) capcount = oveccount;
7736 }
7737
7738 /* "allvector" request showing the entire ovector. */
7739
7740 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0) capcount = oveccount;
7741
7742 /* Output the captured substrings. Note that, for the matched string,
7743 the use of \K in an assertion can make the start later than the end. */
7744
7745 for (i = 0; i < 2*capcount; i += 2)
7746 {
7747 PCRE2_SIZE lleft, lmiddle, lright;
7748 PCRE2_SIZE start = ovector[i];
7749 PCRE2_SIZE end = ovector[i+1];
7750
7751 if (start > end)
7752 {
7753 start = ovector[i+1];
7754 end = ovector[i];
7755 fprintf(outfile, "Start of matched string is beyond its end - "
7756 "displaying from end to start.\n");
7757 }
7758
7759 fprintf(outfile, "%2d: ", i/2);
7760
7761 /* Check for an unset group */
7762
7763 if (start == PCRE2_UNSET && end == PCRE2_UNSET)
7764 {
7765 fprintf(outfile, "<unset>\n");
7766 continue;
7767 }
7768
7769 /* Check for silly offsets, in particular, values that have not been
7770 set when they should have been. However, if we are past the end of the
7771 captures for this pattern ("allvector" causes this), or if we are DFA
7772 matching, it isn't an error if the entry is unchanged. */
7773
7774 if (start > ulen || end > ulen)
7775 {
7776 if (((dat_datctl.control & CTL_DFA) != 0 ||
7777 i >= (int)(2*maxcapcount + 2)) &&
7778 start == JUNK_OFFSET && end == JUNK_OFFSET)
7779 fprintf(outfile, "<unchanged>\n");
7780 else
7781 fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
7782 (unsigned long int)start, (unsigned long int)end);
7783 continue;
7784 }
7785
7786 /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
7787 JIT, it is disabled above, with a comment.) When the match is done by the
7788 interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
7789 set, and if the leftmost consulted character is before the start of the
7790 match or the rightmost consulted character is past the end of the match,
7791 we want to show all consulted characters for the main matched string, and
7792 indicate which were lookarounds. */
7793
7794 if (i == 0)
7795 {
7796 BOOL showallused;
7797 PCRE2_SIZE leftchar, rightchar;
7798
7799 if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
7800 {
7801 leftchar = FLD(match_data, leftchar);
7802 rightchar = FLD(match_data, rightchar);
7803 showallused = i == 0 && (leftchar < start || rightchar > end);
7804 }
7805 else showallused = FALSE;
7806
7807 if (showallused)
7808 {
7809 PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
7810 PCHARS(lmiddle, pp, start, end - start, utf, outfile);
7811 PCHARS(lright, pp, end, rightchar - end, utf, outfile);
7812 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7813 fprintf(outfile, " (JIT)");
7814 fprintf(outfile, "\n ");
7815 for (j = 0; j < lleft; j++) fprintf(outfile, "<");
7816 for (j = 0; j < lmiddle; j++) fprintf(outfile, " ");
7817 for (j = 0; j < lright; j++) fprintf(outfile, ">");
7818 }
7819
7820 /* When a pattern contains \K, the start of match position may be
7821 different to the start of the matched string. When this is the case,
7822 show it when requested. */
7823
7824 else if ((dat_datctl.control & CTL_STARTCHAR) != 0)
7825 {
7826 PCRE2_SIZE startchar;
7827 PCRE2_GET_STARTCHAR(startchar, match_data);
7828 PCHARS(lleft, pp, startchar, start - startchar, utf, outfile);
7829 PCHARSV(pp, start, end - start, utf, outfile);
7830 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7831 fprintf(outfile, " (JIT)");
7832 if (startchar != start)
7833 {
7834 fprintf(outfile, "\n ");
7835 for (j = 0; j < lleft; j++) fprintf(outfile, "^");
7836 }
7837 }
7838
7839 /* Otherwise, just show the matched string. */
7840
7841 else
7842 {
7843 PCHARSV(pp, start, end - start, utf, outfile);
7844 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7845 fprintf(outfile, " (JIT)");
7846 }
7847 }
7848
7849 /* Not the main matched string. Just show it unadorned. */
7850
7851 else
7852 {
7853 PCHARSV(pp, start, end - start, utf, outfile);
7854 }
7855
7856 fprintf(outfile, "\n");
7857
7858 /* Note: don't use the start/end variables here because we want to
7859 show the text from what is reported as the end. */
7860
7861 if ((dat_datctl.control & CTL_ALLAFTERTEXT) != 0 ||
7862 (i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0))
7863 {
7864 fprintf(outfile, "%2d+ ", i/2);
7865 PCHARSV(pp, ovector[i+1], ulen - ovector[i+1], utf, outfile);
7866 fprintf(outfile, "\n");
7867 }
7868 }
7869
7870 /* Output (*MARK) data if requested */
7871
7872 if ((dat_datctl.control & CTL_MARK) != 0 &&
7873 TESTFLD(match_data, mark, !=, NULL))
7874 {
7875 fprintf(outfile, "MK: ");
7876 PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile);
7877 fprintf(outfile, "\n");
7878 }
7879
7880 /* Process copy/get strings */
7881
7882 if (!copy_and_get(utf, capcount)) return PR_ABEND;
7883
7884 } /* End of handling a successful match */
7885
7886 /* There was a partial match. The value of ovector[0] is the bumpalong point,
7887 that is, startchar, not any \K point that might have been passed. When JIT is
7888 not in use, "allusedtext" may be set, in which case we indicate the leftmost
7889 consulted character. */
7890
7891 else if (capcount == PCRE2_ERROR_PARTIAL)
7892 {
7893 PCRE2_SIZE leftchar;
7894 int backlength;
7895 int rubriclength = 0;
7896
7897 if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
7898 {
7899 leftchar = FLD(match_data, leftchar);
7900 }
7901 else leftchar = ovector[0];
7902
7903 fprintf(outfile, "Partial match");
7904 if ((dat_datctl.control & CTL_MARK) != 0 &&
7905 TESTFLD(match_data, mark, !=, NULL))
7906 {
7907 fprintf(outfile, ", mark=");
7908 PCHARS(rubriclength, CASTFLD(void *, match_data, mark), -1, -1, utf,
7909 outfile);
7910 rubriclength += 7;
7911 }
7912 fprintf(outfile, ": ");
7913 rubriclength += 15;
7914
7915 PCHARS(backlength, pp, leftchar, ovector[0] - leftchar, utf, outfile);
7916 PCHARSV(pp, ovector[0], ulen - ovector[0], utf, outfile);
7917
7918 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7919 fprintf(outfile, " (JIT)");
7920 fprintf(outfile, "\n");
7921
7922 if (backlength != 0)
7923 {
7924 int i;
7925 for (i = 0; i < rubriclength; i++) fprintf(outfile, " ");
7926 for (i = 0; i < backlength; i++) fprintf(outfile, "<");
7927 fprintf(outfile, "\n");
7928 }
7929
7930 if (ulen != ovector[1])
7931 fprintf(outfile, "** ovector[1] is not equal to the subject length: "
7932 "%ld != %ld\n", (unsigned long int)ovector[1], (unsigned long int)ulen);
7933
7934 /* Process copy/get strings */
7935
7936 if (!copy_and_get(utf, 1)) return PR_ABEND;
7937
7938 /* "allvector" outputs the entire vector */
7939
7940 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7941 show_ovector(ovector, oveccount);
7942
7943 break; /* Out of the /g loop */
7944 } /* End of handling partial match */
7945
7946 /* Failed to match. If this is a /g or /G loop, we might previously have
7947 set g_notempty (to PCRE2_NOTEMPTY_ATSTART|PCRE2_ANCHORED) after a null match.
7948 If that is the case, this is not necessarily the end. We want to advance the
7949 start offset, and continue. We won't be at the end of the string - that was
7950 checked before setting g_notempty. We achieve the effect by pretending that a
7951 single character was matched.
7952
7953 Complication arises in the case when the newline convention is "any", "crlf",
7954 or "anycrlf". If the previous match was at the end of a line terminated by
7955 CRLF, an advance of one character just passes the CR, whereas we should
7956 prefer the longer newline sequence, as does the code in pcre2_match().
7957
7958 Otherwise, in the case of UTF-8 or UTF-16 matching, the advance must be one
7959 character, not one byte. */
7960
7961 else if (g_notempty != 0) /* There was a previous null match */
7962 {
7963 uint16_t nl = FLD(compiled_code, newline_convention);
7964 PCRE2_SIZE start_offset = dat_datctl.offset; /* Where the match was */
7965 PCRE2_SIZE end_offset = start_offset + 1;
7966
7967 if ((nl == PCRE2_NEWLINE_CRLF || nl == PCRE2_NEWLINE_ANY ||
7968 nl == PCRE2_NEWLINE_ANYCRLF) &&
7969 start_offset < ulen - 1 &&
7970 CODE_UNIT(pp, start_offset) == '\r' &&
7971 CODE_UNIT(pp, end_offset) == '\n')
7972 end_offset++;
7973
7974 else if (utf && test_mode != PCRE32_MODE)
7975 {
7976 if (test_mode == PCRE8_MODE)
7977 {
7978 for (; end_offset < ulen; end_offset++)
7979 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
7980 }
7981 else /* 16-bit mode */
7982 {
7983 for (; end_offset < ulen; end_offset++)
7984 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
7985 }
7986 }
7987
7988 SETFLDVEC(match_data, ovector, 0, start_offset);
7989 SETFLDVEC(match_data, ovector, 1, end_offset);
7990 } /* End of handling null match in a global loop */
7991
7992 /* A "normal" match failure. There will be a negative error number in
7993 capcount. */
7994
7995 else
7996 {
7997 switch(capcount)
7998 {
7999 case PCRE2_ERROR_NOMATCH:
8000 if (gmatched == 0)
8001 {
8002 fprintf(outfile, "No match");
8003 if ((dat_datctl.control & CTL_MARK) != 0 &&
8004 TESTFLD(match_data, mark, !=, NULL))
8005 {
8006 fprintf(outfile, ", mark = ");
8007 PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile);
8008 }
8009 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
8010 fprintf(outfile, " (JIT)");
8011 fprintf(outfile, "\n");
8012
8013 /* "allvector" outputs the entire vector */
8014
8015 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
8016 show_ovector(ovector, oveccount);
8017 }
8018 break;
8019
8020 case PCRE2_ERROR_BADUTFOFFSET:
8021 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", capcount, test_mode);
8022 break;
8023
8024 default:
8025 fprintf(outfile, "Failed: error %d: ", capcount);
8026 if (!print_error_message(capcount, "", "")) return PR_ABEND;
8027 if (capcount <= PCRE2_ERROR_UTF8_ERR1 &&
8028 capcount >= PCRE2_ERROR_UTF32_ERR2)
8029 {
8030 PCRE2_SIZE startchar;
8031 PCRE2_GET_STARTCHAR(startchar, match_data);
Elliott Hughes16619d62021-10-29 12:10:38 -07008032 fprintf(outfile, " at offset %" SIZ_FORM, startchar);
Elliott Hughes5b808042021-10-01 10:56:10 -07008033 }
8034 fprintf(outfile, "\n");
8035 break;
8036 }
8037
8038 break; /* Out of the /g loop */
8039 } /* End of failed match handling */
8040
8041 /* Control reaches here in two circumstances: (a) after a match, and (b)
8042 after a non-match that immediately followed a match on an empty string when
8043 doing a global search. Such a match is done with PCRE2_NOTEMPTY_ATSTART and
8044 PCRE2_ANCHORED set in g_notempty. The code above turns it into a fake match
8045 of one character. So effectively we get here only after a match. If we
8046 are not doing a global search, we are done. */
8047
8048 if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else
8049 {
8050 PCRE2_SIZE match_offset = FLD(match_data, ovector)[0];
8051 PCRE2_SIZE end_offset = FLD(match_data, ovector)[1];
8052
8053 /* We must now set up for the next iteration of a global search. If we have
8054 matched an empty string, first check to see if we are at the end of the
8055 subject. If so, the loop is over. Otherwise, mimic what Perl's /g option
8056 does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again
8057 at the same point. If this fails it will be picked up above, where a fake
8058 match is set up so that at this point we advance to the next character.
8059
8060 However, in order to cope with patterns that never match at their starting
8061 offset (e.g. /(?<=\G.)/) we don't do this when the match offset is greater
8062 than the starting offset. This means there will be a retry with the
8063 starting offset at the match offset. If this returns the same match again,
8064 it is picked up above and ignored, and the special action is then taken. */
8065
8066 if (match_offset == end_offset)
8067 {
8068 if (end_offset == ulen) break; /* End of subject */
8069 if (match_offset <= dat_datctl.offset)
8070 g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
8071 }
8072
8073 /* However, even after matching a non-empty string, there is still one
8074 tricky case. If a pattern contains \K within a lookbehind assertion at the
8075 start, the end of the matched string can be at the offset where the match
8076 started. In the case of a normal /g iteration without special action, this
8077 leads to a loop that keeps on returning the same substring. The loop would
8078 be caught above, but we really want to move on to the next match. */
8079
8080 else
8081 {
8082 g_notempty = 0; /* Set for a "normal" repeat */
8083 if ((dat_datctl.control & CTL_GLOBAL) != 0)
8084 {
8085 PCRE2_SIZE startchar;
8086 PCRE2_GET_STARTCHAR(startchar, match_data);
8087 if (end_offset <= startchar)
8088 {
8089 if (startchar >= ulen) break; /* End of subject */
8090 end_offset = startchar + 1;
8091 if (utf && test_mode != PCRE32_MODE)
8092 {
8093 if (test_mode == PCRE8_MODE)
8094 {
8095 for (; end_offset < ulen; end_offset++)
8096 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
8097 }
8098 else /* 16-bit mode */
8099 {
8100 for (; end_offset < ulen; end_offset++)
8101 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
8102 }
8103 }
8104 }
8105 }
8106 }
8107
8108 /* For a normal global (/g) iteration, save the current ovector[0,1] and
8109 the starting offset so that we can check that they do change each time.
8110 Otherwise a matching bug that returns the same string causes an infinite
8111 loop. It has happened! Then update the start offset, leaving other
8112 parameters alone. */
8113
8114 if ((dat_datctl.control & CTL_GLOBAL) != 0)
8115 {
8116 ovecsave[0] = ovector[0];
8117 ovecsave[1] = ovector[1];
8118 ovecsave[2] = dat_datctl.offset;
8119 dat_datctl.offset = end_offset;
8120 }
8121
8122 /* For altglobal, just update the pointer and length. */
8123
8124 else
8125 {
8126 pp += end_offset * code_unit_size;
8127 len -= end_offset * code_unit_size;
8128 ulen -= end_offset;
8129 if (arg_ulen != PCRE2_ZERO_TERMINATED) arg_ulen -= end_offset;
8130 }
8131 }
8132 } /* End of global loop */
8133
8134show_memory = FALSE;
8135return PR_OK;
8136}
8137
8138
8139
8140
8141/*************************************************
8142* Print PCRE2 version *
8143*************************************************/
8144
8145static void
8146print_version(FILE *f)
8147{
8148VERSION_TYPE *vp;
8149fprintf(f, "PCRE2 version ");
8150for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp);
8151fprintf(f, "\n");
8152}
8153
8154
8155
8156/*************************************************
8157* Print Unicode version *
8158*************************************************/
8159
8160static void
8161print_unicode_version(FILE *f)
8162{
8163VERSION_TYPE *vp;
8164fprintf(f, "Unicode version ");
8165for (vp = uversion; *vp != 0; vp++) fprintf(f, "%c", *vp);
8166}
8167
8168
8169
8170/*************************************************
8171* Print JIT target *
8172*************************************************/
8173
8174static void
8175print_jit_target(FILE *f)
8176{
8177VERSION_TYPE *vp;
8178for (vp = jittarget; *vp != 0; vp++) fprintf(f, "%c", *vp);
8179}
8180
8181
8182
8183/*************************************************
8184* Print newline configuration *
8185*************************************************/
8186
8187/* Output is always to stdout.
8188
8189Arguments:
8190 rc the return code from PCRE2_CONFIG_NEWLINE
8191 isc TRUE if called from "-C newline"
8192Returns: nothing
8193*/
8194
8195static void
8196print_newline_config(uint32_t optval, BOOL isc)
8197{
8198if (!isc) printf(" Default newline sequence is ");
8199if (optval < sizeof(newlines)/sizeof(char *))
8200 printf("%s\n", newlines[optval]);
8201else
8202 printf("a non-standard value: %d\n", optval);
8203}
8204
8205
8206
8207/*************************************************
8208* Usage function *
8209*************************************************/
8210
8211static void
8212usage(void)
8213{
8214printf("Usage: pcre2test [options] [<input file> [<output file>]]\n\n");
8215printf("Input and output default to stdin and stdout.\n");
8216#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
8217printf("If input is a terminal, readline() is used to read from it.\n");
8218#else
8219printf("This version of pcre2test is not linked with readline().\n");
8220#endif
8221printf("\nOptions:\n");
8222#ifdef SUPPORT_PCRE2_8
8223printf(" -8 use the 8-bit library\n");
8224#endif
8225#ifdef SUPPORT_PCRE2_16
8226printf(" -16 use the 16-bit library\n");
8227#endif
8228#ifdef SUPPORT_PCRE2_32
8229printf(" -32 use the 32-bit library\n");
8230#endif
8231printf(" -ac set default pattern modifier PCRE2_AUTO_CALLOUT\n");
8232printf(" -AC as -ac, but also set subject 'callout_extra' modifier\n");
8233printf(" -b set default pattern modifier 'fullbincode'\n");
8234printf(" -C show PCRE2 compile-time options and exit\n");
8235printf(" -C arg show a specific compile-time option and exit with its\n");
8236printf(" value if numeric (else 0). The arg can be:\n");
8237printf(" backslash-C use of \\C is enabled [0, 1]\n");
8238printf(" bsr \\R type [ANYCRLF, ANY]\n");
8239printf(" ebcdic compiled for EBCDIC character code [0,1]\n");
8240printf(" ebcdic-nl NL code if compiled for EBCDIC\n");
8241printf(" jit just-in-time compiler supported [0, 1]\n");
8242printf(" linksize internal link size [2, 3, 4]\n");
8243printf(" newline newline type [CR, LF, CRLF, ANYCRLF, ANY, NUL]\n");
8244printf(" pcre2-8 8 bit library support enabled [0, 1]\n");
8245printf(" pcre2-16 16 bit library support enabled [0, 1]\n");
8246printf(" pcre2-32 32 bit library support enabled [0, 1]\n");
8247printf(" unicode Unicode and UTF support enabled [0, 1]\n");
8248printf(" -d set default pattern modifier 'debug'\n");
8249printf(" -dfa set default subject modifier 'dfa'\n");
8250printf(" -error <n,m,..> show messages for error numbers, then exit\n");
8251printf(" -help show usage information\n");
8252printf(" -i set default pattern modifier 'info'\n");
8253printf(" -jit set default pattern modifier 'jit'\n");
8254printf(" -jitfast set default pattern modifier 'jitfast'\n");
8255printf(" -jitverify set default pattern modifier 'jitverify'\n");
8256printf(" -LM list pattern and subject modifiers, then exit\n");
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008257printf(" -LP list non-script properties, then exit\n");
8258printf(" -LS list supported scripts, then exit\n");
Elliott Hughes5b808042021-10-01 10:56:10 -07008259printf(" -q quiet: do not output PCRE2 version number at start\n");
8260printf(" -pattern <s> set default pattern modifier fields\n");
8261printf(" -subject <s> set default subject modifier fields\n");
8262printf(" -S <n> set stack size to <n> mebibytes\n");
8263printf(" -t [<n>] time compilation and execution, repeating <n> times\n");
8264printf(" -tm [<n>] time execution (matching) only, repeating <n> times\n");
8265printf(" -T same as -t, but show total times at the end\n");
8266printf(" -TM same as -tm, but show total time at the end\n");
8267printf(" -version show PCRE2 version and exit\n");
8268}
8269
8270
8271
8272/*************************************************
8273* Handle -C option *
8274*************************************************/
8275
8276/* This option outputs configuration options and sets an appropriate return
8277code when asked for a single option. The code is abstracted into a separate
8278function because of its size. Use whichever pcre2_config() function is
8279available.
8280
8281Argument: an option name or NULL
8282Returns: the return code
8283*/
8284
8285static int
8286c_option(const char *arg)
8287{
8288uint32_t optval;
8289unsigned int i = COPTLISTCOUNT;
8290int yield = 0;
8291
8292if (arg != NULL && arg[0] != CHAR_MINUS)
8293 {
8294 for (i = 0; i < COPTLISTCOUNT; i++)
8295 if (strcmp(arg, coptlist[i].name) == 0) break;
8296
8297 if (i >= COPTLISTCOUNT)
8298 {
8299 fprintf(stderr, "** Unknown -C option '%s'\n", arg);
8300 return 0;
8301 }
8302
8303 switch (coptlist[i].type)
8304 {
8305 case CONF_BSR:
8306 (void)PCRE2_CONFIG(coptlist[i].value, &optval);
8307 printf("%s\n", (optval == PCRE2_BSR_ANYCRLF)? "ANYCRLF" : "ANY");
8308 break;
8309
8310 case CONF_FIX:
8311 yield = coptlist[i].value;
8312 printf("%d\n", yield);
8313 break;
8314
8315 case CONF_FIZ:
8316 optval = coptlist[i].value;
8317 printf("%d\n", optval);
8318 break;
8319
8320 case CONF_INT:
8321 (void)PCRE2_CONFIG(coptlist[i].value, &yield);
8322 printf("%d\n", yield);
8323 break;
8324
8325 case CONF_NL:
8326 (void)PCRE2_CONFIG(coptlist[i].value, &optval);
8327 print_newline_config(optval, TRUE);
8328 break;
8329 }
8330
8331/* For VMS, return the value by setting a symbol, for certain values only. This
8332is contributed code which the PCRE2 developers have no means of testing. */
8333
8334#ifdef __VMS
8335
8336/* This is the original code provided by the first VMS contributor. */
8337#ifdef NEVER
8338 if (copytlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
8339 {
8340 char ucname[16];
8341 strcpy(ucname, coptlist[i].name);
8342 for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i]];
8343 vms_setsymbol(ucname, 0, optval);
8344 }
8345#endif
8346
8347/* This is the new code, provided by a second VMS contributor. */
8348
8349 if (coptlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
8350 {
8351 char nam_buf[22], val_buf[4];
8352 $DESCRIPTOR(nam, nam_buf);
8353 $DESCRIPTOR(val, val_buf);
8354
8355 strcpy(nam_buf, coptlist[i].name);
8356 nam.dsc$w_length = strlen(nam_buf);
8357 sprintf(val_buf, "%d", yield);
8358 val.dsc$w_length = strlen(val_buf);
8359 lib$set_symbol(&nam, &val);
8360 }
8361#endif /* __VMS */
8362
8363 return yield;
8364 }
8365
8366/* No argument for -C: output all configuration information. */
8367
8368print_version(stdout);
8369printf("Compiled with\n");
8370
8371#ifdef EBCDIC
8372printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
8373#if defined NATIVE_ZOS
8374printf(" EBCDIC code page %s or similar\n", pcrz_cpversion());
8375#endif
8376#endif
8377
8378(void)PCRE2_CONFIG(PCRE2_CONFIG_COMPILED_WIDTHS, &optval);
8379if (optval & 1) printf(" 8-bit support\n");
8380if (optval & 2) printf(" 16-bit support\n");
8381if (optval & 4) printf(" 32-bit support\n");
8382
8383#ifdef SUPPORT_VALGRIND
8384printf(" Valgrind support\n");
8385#endif
8386
8387(void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &optval);
8388if (optval != 0)
8389 {
8390 printf(" UTF and UCP support (");
8391 print_unicode_version(stdout);
8392 printf(")\n");
8393 }
8394else printf(" No Unicode support\n");
8395
8396(void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &optval);
8397if (optval != 0)
8398 {
8399 printf(" Just-in-time compiler support: ");
8400 print_jit_target(stdout);
8401 printf("\n");
8402 }
8403else
8404 {
8405 printf(" No just-in-time compiler support\n");
8406 }
8407
8408(void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &optval);
8409print_newline_config(optval, FALSE);
8410(void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval);
8411printf(" \\R matches %s\n",
8412 (optval == PCRE2_BSR_ANYCRLF)? "CR, LF, or CRLF only" :
8413 "all Unicode newlines");
8414(void)PCRE2_CONFIG(PCRE2_CONFIG_NEVER_BACKSLASH_C, &optval);
8415printf(" \\C is %ssupported\n", optval? "not ":"");
8416(void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval);
8417printf(" Internal link size = %d\n", optval);
8418(void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
8419printf(" Parentheses nest limit = %d\n", optval);
8420(void)PCRE2_CONFIG(PCRE2_CONFIG_HEAPLIMIT, &optval);
8421printf(" Default heap limit = %d kibibytes\n", optval);
8422(void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
8423printf(" Default match limit = %d\n", optval);
8424(void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval);
8425printf(" Default depth limit = %d\n", optval);
8426
8427#if defined SUPPORT_LIBREADLINE
8428printf(" pcre2test has libreadline support\n");
8429#elif defined SUPPORT_LIBEDIT
8430printf(" pcre2test has libedit support\n");
8431#else
8432printf(" pcre2test has neither libreadline nor libedit support\n");
8433#endif
8434
8435return 0;
8436}
8437
8438
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008439/*************************************************
8440* Format one property/script list item *
8441*************************************************/
8442
8443#ifdef SUPPORT_UNICODE
8444static void
8445format_list_item(int16_t *ff, char *buff, BOOL isscript)
8446{
8447int count;
8448int maxi = 0;
8449const char *maxs = "";
8450size_t max = 0;
8451
8452for (count = 0; ff[count] >= 0; count++) {}
8453
8454/* Find the name to put first. For scripts, any 3-character name is chosen.
8455For non-scripts, or if there is no 3-character name, take the longest. */
8456
8457for (int i = 0; ff[i] >= 0; i++)
8458 {
8459 const char *s = PRIV(utt_names) + ff[i];
8460 size_t len = strlen(s);
8461 if (isscript && len == 3)
8462 {
8463 maxi = i;
8464 max = len;
8465 maxs = s;
8466 break;
8467 }
8468 else if (len > max)
8469 {
8470 max = len;
8471 maxi = i;
8472 maxs = s;
8473 }
8474 }
8475
8476strcpy(buff, maxs);
8477buff += max;
8478
8479if (count > 1)
8480 {
8481 const char *sep = " (";
8482 for (int i = 0; i < count; i++)
8483 {
8484 if (i == maxi) continue;
8485 buff += sprintf(buff, "%s%s", sep, PRIV(utt_names) + ff[i]);
8486 sep = ", ";
8487 }
8488 (void)sprintf(buff, ")");
8489 }
8490}
8491#endif /* SUPPORT_UNICODE */
8492
8493
8494
8495/*************************************************
8496* Display scripts or properties *
8497*************************************************/
8498
8499#define MAX_SYNONYMS 5
8500
8501static void
8502display_properties(BOOL wantscripts)
8503{
8504#ifndef SUPPORT_UNICODE
8505(void)wantscripts;
8506printf("** This version of PCRE2 was compiled without Unicode support.\n");
8507#else
8508
8509const char *typename;
8510uint16_t seentypes[1024];
8511uint16_t seenvalues[1024];
8512int seencount = 0;
8513int16_t found[256][MAX_SYNONYMS + 1];
8514int fc = 0;
8515int colwidth = 40;
8516int n;
8517
8518if (wantscripts)
8519 {
8520 n = ucp_Script_Count;
8521 typename = "SCRIPTS";
8522 }
8523else
8524 {
8525 n = ucp_Bprop_Count;
8526 typename = "PROPERTIES";
8527 }
8528
8529for (size_t i = 0; i < PRIV(utt_size); i++)
8530 {
8531 int k;
8532 int m = 0;
8533 int16_t *fv;
8534 const ucp_type_table *t = PRIV(utt) + i;
8535 unsigned int value = t->value;
8536
8537 if (wantscripts)
8538 {
8539 if (t->type != PT_SC && t->type != PT_SCX) continue;
8540 }
8541 else
8542 {
8543 if (t->type != PT_BOOL) continue;
8544 }
8545
8546 for (k = 0; k < seencount; k++)
8547 {
8548 if (t->type == seentypes[k] && t->value == seenvalues[k]) break;
8549 }
8550 if (k < seencount) continue;
8551
8552 seentypes[seencount] = t->type;
8553 seenvalues[seencount++] = t->value;
8554
8555 fv = found[fc++];
8556 fv[m++] = t->name_offset;
8557
8558 for (size_t j = i + 1; j < PRIV(utt_size); j++)
8559 {
8560 const ucp_type_table *tt = PRIV(utt) + j;
8561 if (tt->type != t->type || tt->value != value) continue;
8562 if (m >= MAX_SYNONYMS)
8563 printf("** Too many synonyms: %s ignored\n",
8564 PRIV(utt_names) + tt->name_offset);
8565 else fv[m++] = tt->name_offset;
8566 }
8567
8568 fv[m] = -1;
8569 }
8570
8571printf("-------------------------- SUPPORTED %s --------------------------\n\n",
8572 typename);
8573
8574if (!wantscripts) printf(
8575"This release of PCRE2 supports Unicode's general category properties such\n"
8576"as Lu (upper case letter), bi-directional properties such as Bidi_Class,\n"
8577"and the following binary (yes/no) properties:\n\n");
8578
8579
8580for (int k = 0; k < (n+1)/2; k++)
8581 {
8582 int x;
8583 char buff1[128];
8584 char buff2[128];
8585
8586 format_list_item(found[k], buff1, wantscripts);
8587 x = k + (n+1)/2;
8588 if (x < n) format_list_item(found[x], buff2, wantscripts);
8589 else buff2[0] = 0;
8590
8591 x = printf("%s", buff1);
8592 while (x++ < colwidth) printf(" ");
8593 printf("%s\n", buff2);
8594 }
8595
8596#endif /* SUPPORT_UNICODE */
8597}
8598
8599
Elliott Hughes5b808042021-10-01 10:56:10 -07008600
8601/*************************************************
8602* Display one modifier *
8603*************************************************/
8604
8605static void
8606display_one_modifier(modstruct *m, BOOL for_pattern)
8607{
8608uint32_t c = (!for_pattern && (m->which == MOD_PND || m->which == MOD_PNDP))?
8609 '*' : ' ';
8610printf("%c%s", c, m->name);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008611for (size_t i = 0; i < C1MODLISTCOUNT; i++)
8612 {
8613 if (strcmp(m->name, c1modlist[i].fullname) == 0)
8614 printf(" (%c)", c1modlist[i].onechar);
8615 }
Elliott Hughes5b808042021-10-01 10:56:10 -07008616}
8617
8618
8619
8620/*************************************************
8621* Display pattern or subject modifiers *
8622*************************************************/
8623
8624/* In order to print in two columns, first scan without printing to get a list
8625of the modifiers that are required.
8626
8627Arguments:
8628 for_pattern TRUE for pattern modifiers, FALSE for subject modifiers
8629 title string to be used in title
8630
8631Returns: nothing
8632*/
8633
8634static void
8635display_selected_modifiers(BOOL for_pattern, const char *title)
8636{
8637uint32_t i, j;
8638uint32_t n = 0;
8639uint32_t list[MODLISTCOUNT];
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008640uint32_t extra[MODLISTCOUNT];
Elliott Hughes5b808042021-10-01 10:56:10 -07008641
8642for (i = 0; i < MODLISTCOUNT; i++)
8643 {
8644 BOOL is_pattern = TRUE;
8645 modstruct *m = modlist + i;
8646
8647 switch (m->which)
8648 {
8649 case MOD_CTC: /* Compile context */
8650 case MOD_PAT: /* Pattern */
8651 case MOD_PATP: /* Pattern, OK for Perl-compatible test */
8652 break;
8653
8654 /* The MOD_PND and MOD_PNDP modifiers are precisely those that affect
8655 subjects, but can be given with a pattern. We list them as subject
8656 modifiers, but marked with an asterisk.*/
8657
8658 case MOD_CTM: /* Match context */
8659 case MOD_DAT: /* Subject line */
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008660 case MOD_DATP: /* Subject line, OK for Perl-compatible test */
Elliott Hughes5b808042021-10-01 10:56:10 -07008661 case MOD_PND: /* As PD, but not default pattern */
8662 case MOD_PNDP: /* As PND, OK for Perl-compatible test */
8663 is_pattern = FALSE;
8664 break;
8665
8666 default: printf("** Unknown type for modifier '%s'\n", m->name);
8667 /* Fall through */
8668 case MOD_PD: /* Pattern or subject */
8669 case MOD_PDP: /* As PD, OK for Perl-compatible test */
8670 is_pattern = for_pattern;
8671 break;
8672 }
8673
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008674 if (for_pattern == is_pattern)
8675 {
8676 extra[n] = 0;
8677 for (size_t k = 0; k < C1MODLISTCOUNT; k++)
8678 {
8679 if (strcmp(m->name, c1modlist[k].fullname) == 0)
8680 {
8681 extra[n] += 4;
8682 break;
8683 }
8684 }
8685 list[n++] = i;
8686 }
Elliott Hughes5b808042021-10-01 10:56:10 -07008687 }
8688
8689/* Now print from the list in two columns. */
8690
8691printf("-------------- %s MODIFIERS --------------\n", title);
8692
8693for (i = 0, j = (n+1)/2; i < (n+1)/2; i++, j++)
8694 {
8695 modstruct *m = modlist + list[i];
8696 display_one_modifier(m, for_pattern);
8697 if (j < n)
8698 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008699 uint32_t k = 27 - strlen(m->name) - extra[i];
Elliott Hughes5b808042021-10-01 10:56:10 -07008700 while (k-- > 0) printf(" ");
8701 display_one_modifier(modlist + list[j], for_pattern);
8702 }
8703 printf("\n");
8704 }
8705}
8706
8707
8708
8709/*************************************************
8710* Display the list of modifiers *
8711*************************************************/
8712
8713static void
8714display_modifiers(void)
8715{
8716printf(
8717 "An asterisk on a subject modifier means that it may be given on a pattern\n"
8718 "line, in order to apply to all subjects matched by that pattern. Modifiers\n"
8719 "that are listed for both patterns and subjects have different effects in\n"
8720 "each case.\n\n");
8721display_selected_modifiers(TRUE, "PATTERN");
8722printf("\n");
8723display_selected_modifiers(FALSE, "SUBJECT");
8724}
8725
8726
8727
8728/*************************************************
8729* Main Program *
8730*************************************************/
8731
8732int
8733main(int argc, char **argv)
8734{
8735uint32_t temp;
8736uint32_t yield = 0;
8737uint32_t op = 1;
8738BOOL notdone = TRUE;
8739BOOL quiet = FALSE;
8740BOOL showtotaltimes = FALSE;
8741BOOL skipping = FALSE;
8742char *arg_subject = NULL;
8743char *arg_pattern = NULL;
8744char *arg_error = NULL;
8745
8746/* The offsets to the options and control bits fields of the pattern and data
8747control blocks must be the same so that common options and controls such as
8748"anchored" or "memory" can work for either of them from a single table entry.
8749We cannot test this till runtime because "offsetof" does not work in the
8750preprocessor. */
8751
8752if (PO(options) != DO(options) || PO(control) != DO(control) ||
8753 PO(control2) != DO(control2))
8754 {
8755 fprintf(stderr, "** Coding error: "
8756 "options and control offsets for pattern and data must be the same.\n");
8757 return 1;
8758 }
8759
8760/* Get the PCRE2 and Unicode version number and JIT target information, at the
8761same time checking that a request for the length gives the same answer. Also
8762check lengths for non-string items. */
8763
8764if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
8765 PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) ||
8766
8767 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) !=
8768 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) ||
8769
8770 PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) !=
8771 PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) ||
8772
8773 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(uint32_t) ||
8774 PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(uint32_t))
8775 {
8776 fprintf(stderr, "** Error in pcre2_config(): bad length\n");
8777 return 1;
8778 }
8779
8780/* Check that bad options are diagnosed. */
8781
8782if (PCRE2_CONFIG(999, NULL) != PCRE2_ERROR_BADOPTION ||
8783 PCRE2_CONFIG(999, &temp) != PCRE2_ERROR_BADOPTION)
8784 {
8785 fprintf(stderr, "** Error in pcre2_config(): bad option not diagnosed\n");
8786 return 1;
8787 }
8788
8789/* This configuration option is now obsolete, but running a quick check ensures
8790that its code is covered. */
8791
8792(void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &temp);
8793
8794/* Get buffers from malloc() so that valgrind will check their misuse when
8795debugging. They grow automatically when very long lines are read. The 16-
8796and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */
8797
8798buffer = (uint8_t *)malloc(pbuffer8_size);
8799pbuffer8 = (uint8_t *)malloc(pbuffer8_size);
8800
8801/* The following _setmode() stuff is some Windows magic that tells its runtime
8802library to translate CRLF into a single LF character. At least, that's what
8803I've been told: never having used Windows I take this all on trust. Originally
8804it set 0x8000, but then I was advised that _O_BINARY was better. */
8805
8806#if defined(_WIN32) || defined(WIN32)
8807_setmode( _fileno( stdout ), _O_BINARY );
8808#endif
8809
8810/* Initialization that does not depend on the running mode. */
8811
8812locale_name[0] = 0;
8813
8814memset(&def_patctl, 0, sizeof(patctl));
8815def_patctl.convert_type = CONVERT_UNSET;
8816
8817memset(&def_datctl, 0, sizeof(datctl));
8818def_datctl.oveccount = DEFAULT_OVECCOUNT;
8819def_datctl.copy_numbers[0] = -1;
8820def_datctl.get_numbers[0] = -1;
8821def_datctl.startend[0] = def_datctl.startend[1] = CFORE_UNSET;
8822def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET;
8823def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET;
8824
8825/* Scan command line options. */
8826
8827while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
8828 {
8829 char *endptr;
8830 char *arg = argv[op];
8831 unsigned long uli;
8832
8833 /* List modifiers and exit. */
8834
8835 if (strcmp(arg, "-LM") == 0)
8836 {
8837 display_modifiers();
8838 goto EXIT;
8839 }
8840
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008841 /* List properties and exit */
8842
8843 if (strcmp(arg, "-LP") == 0)
8844 {
8845 display_properties(FALSE);
8846 goto EXIT;
8847 }
8848
8849 /* List scripts and exit */
8850
8851 if (strcmp(arg, "-LS") == 0)
8852 {
8853 display_properties(TRUE);
8854 goto EXIT;
8855 }
8856
Elliott Hughes5b808042021-10-01 10:56:10 -07008857 /* Display and/or set return code for configuration options. */
8858
8859 if (strcmp(arg, "-C") == 0)
8860 {
8861 yield = c_option(argv[op + 1]);
8862 goto EXIT;
8863 }
8864
8865 /* Select operating mode. Ensure that pcre2_config() is called in 16-bit
8866 and 32-bit modes because that won't happen naturally when 8-bit is also
8867 configured. Also call some other functions that are not otherwise used. This
8868 means that a coverage report won't claim there are uncalled functions. */
8869
8870 if (strcmp(arg, "-8") == 0)
8871 {
8872#ifdef SUPPORT_PCRE2_8
8873 test_mode = PCRE8_MODE;
8874 (void)pcre2_set_bsr_8(pat_context8, 999);
8875 (void)pcre2_set_newline_8(pat_context8, 999);
8876#else
8877 fprintf(stderr,
8878 "** This version of PCRE2 was built without 8-bit support\n");
8879 exit(1);
8880#endif
8881 }
8882
8883 else if (strcmp(arg, "-16") == 0)
8884 {
8885#ifdef SUPPORT_PCRE2_16
8886 test_mode = PCRE16_MODE;
8887 (void)pcre2_config_16(PCRE2_CONFIG_VERSION, NULL);
8888 (void)pcre2_set_bsr_16(pat_context16, 999);
8889 (void)pcre2_set_newline_16(pat_context16, 999);
8890#else
8891 fprintf(stderr,
8892 "** This version of PCRE2 was built without 16-bit support\n");
8893 exit(1);
8894#endif
8895 }
8896
8897 else if (strcmp(arg, "-32") == 0)
8898 {
8899#ifdef SUPPORT_PCRE2_32
8900 test_mode = PCRE32_MODE;
8901 (void)pcre2_config_32(PCRE2_CONFIG_VERSION, NULL);
8902 (void)pcre2_set_bsr_32(pat_context32, 999);
8903 (void)pcre2_set_newline_32(pat_context32, 999);
8904#else
8905 fprintf(stderr,
8906 "** This version of PCRE2 was built without 32-bit support\n");
8907 exit(1);
8908#endif
8909 }
8910
8911 /* Set quiet (no version verification) */
8912
8913 else if (strcmp(arg, "-q") == 0) quiet = TRUE;
8914
8915 /* Set system stack size */
8916
8917 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
8918 ((uli = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
8919 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008920#if defined(_WIN32) || defined(WIN32) || defined(__HAIKU__) || defined(NATIVE_ZOS) || defined(__VMS)
Elliott Hughes5b808042021-10-01 10:56:10 -07008921 fprintf(stderr, "pcre2test: -S is not supported on this OS\n");
8922 exit(1);
8923#else
8924 int rc;
8925 uint32_t stack_size;
8926 struct rlimit rlim;
8927 if (U32OVERFLOW(uli))
8928 {
8929 fprintf(stderr, "** Argument for -S is too big\n");
8930 exit(1);
8931 }
8932 stack_size = (uint32_t)uli;
8933 getrlimit(RLIMIT_STACK, &rlim);
8934 rlim.rlim_cur = stack_size * 1024 * 1024;
8935 if (rlim.rlim_cur > rlim.rlim_max)
8936 {
8937 fprintf(stderr,
8938 "pcre2test: requested stack size %luMiB is greater than hard limit "
8939 "%luMiB\n", (unsigned long int)stack_size,
8940 (unsigned long int)(rlim.rlim_max));
8941 exit(1);
8942 }
8943 rc = setrlimit(RLIMIT_STACK, &rlim);
8944 if (rc != 0)
8945 {
8946 fprintf(stderr, "pcre2test: setting stack size %luMiB failed: %s\n",
8947 (unsigned long int)stack_size, strerror(errno));
8948 exit(1);
8949 }
8950 op++;
8951 argc--;
8952#endif
8953 }
8954
8955 /* Set some common pattern and subject controls */
8956
8957 else if (strcmp(arg, "-AC") == 0)
8958 {
8959 def_patctl.options |= PCRE2_AUTO_CALLOUT;
8960 def_datctl.control2 |= CTL2_CALLOUT_EXTRA;
8961 }
8962 else if (strcmp(arg, "-ac") == 0) def_patctl.options |= PCRE2_AUTO_CALLOUT;
8963 else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE;
8964 else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG;
8965 else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA;
8966 else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO;
8967 else if (strcmp(arg, "-jit") == 0 || strcmp(arg, "-jitverify") == 0 ||
8968 strcmp(arg, "-jitfast") == 0)
8969 {
8970 if (arg[4] == 'v') def_patctl.control |= CTL_JITVERIFY;
8971 else if (arg[4] == 'f') def_patctl.control |= CTL_JITFAST;
8972 def_patctl.jit = JIT_DEFAULT; /* full & partial */
8973#ifndef SUPPORT_JIT
8974 fprintf(stderr, "** Warning: JIT support is not available: "
8975 "-jit[fast|verify] calls functions that do nothing.\n");
8976#endif
8977 }
8978
8979 /* Set timing parameters */
8980
8981 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
8982 strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
8983 {
8984 int both = arg[2] == 0;
8985 showtotaltimes = arg[1] == 'T';
8986 if (argc > 2 && (uli = strtoul(argv[op+1], &endptr, 10), *endptr == 0))
8987 {
8988 if (uli == 0)
8989 {
8990 fprintf(stderr, "** Argument for %s must not be zero\n", arg);
8991 exit(1);
8992 }
8993 if (U32OVERFLOW(uli))
8994 {
8995 fprintf(stderr, "** Argument for %s is too big\n", arg);
8996 exit(1);
8997 }
8998 timeitm = (int)uli;
8999 op++;
9000 argc--;
9001 }
9002 else timeitm = LOOPREPEAT;
9003 if (both) timeit = timeitm;
9004 }
9005
9006 /* Give help */
9007
9008 else if (strcmp(arg, "-help") == 0 ||
9009 strcmp(arg, "--help") == 0)
9010 {
9011 usage();
9012 goto EXIT;
9013 }
9014
9015 /* Show version */
9016
9017 else if (strcmp(arg, "-version") == 0 ||
9018 strcmp(arg, "--version") == 0)
9019 {
9020 print_version(stdout);
9021 goto EXIT;
9022 }
9023
9024 /* The following options save their data for processing once we know what
9025 the running mode is. */
9026
9027 else if (strcmp(arg, "-error") == 0)
9028 {
9029 arg_error = argv[op+1];
9030 goto CHECK_VALUE_EXISTS;
9031 }
9032
9033 else if (strcmp(arg, "-subject") == 0)
9034 {
9035 arg_subject = argv[op+1];
9036 goto CHECK_VALUE_EXISTS;
9037 }
9038
9039 else if (strcmp(arg, "-pattern") == 0)
9040 {
9041 arg_pattern = argv[op+1];
9042 CHECK_VALUE_EXISTS:
9043 if (argc <= 2)
9044 {
9045 fprintf(stderr, "** Missing value for %s\n", arg);
9046 yield = 1;
9047 goto EXIT;
9048 }
9049 op++;
9050 argc--;
9051 }
9052
9053 /* Unrecognized option */
9054
9055 else
9056 {
9057 fprintf(stderr, "** Unknown or malformed option '%s'\n", arg);
9058 usage();
9059 yield = 1;
9060 goto EXIT;
9061 }
9062 op++;
9063 argc--;
9064 }
9065
9066/* If -error was present, get the error numbers, show the messages, and exit.
9067We wait to do this until we know which mode we are in. */
9068
9069if (arg_error != NULL)
9070 {
9071 int len;
9072 int errcode;
9073 char *endptr;
9074
9075/* Ensure the relevant non-8-bit buffer is available. Ensure that it is at
9076least 128 code units, because it is used for retrieving error messages. */
9077
9078#ifdef SUPPORT_PCRE2_16
9079 if (test_mode == PCRE16_MODE)
9080 {
9081 pbuffer16_size = 256;
9082 pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
9083 if (pbuffer16 == NULL)
9084 {
9085 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
Elliott Hughes16619d62021-10-29 12:10:38 -07009086 pbuffer16_size);
Elliott Hughes5b808042021-10-01 10:56:10 -07009087 yield = 1;
9088 goto EXIT;
9089 }
9090 }
9091#endif
9092
9093#ifdef SUPPORT_PCRE2_32
9094 if (test_mode == PCRE32_MODE)
9095 {
9096 pbuffer32_size = 512;
9097 pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
9098 if (pbuffer32 == NULL)
9099 {
9100 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
Elliott Hughes16619d62021-10-29 12:10:38 -07009101 pbuffer32_size);
Elliott Hughes5b808042021-10-01 10:56:10 -07009102 yield = 1;
9103 goto EXIT;
9104 }
9105 }
9106#endif
9107
9108 /* Loop along a list of error numbers. */
9109
9110 for (;;)
9111 {
9112 errcode = strtol(arg_error, &endptr, 10);
9113 if (*endptr != 0 && *endptr != CHAR_COMMA)
9114 {
9115 fprintf(stderr, "** '%s' is not a valid error number list\n", arg_error);
9116 yield = 1;
9117 goto EXIT;
9118 }
9119 printf("Error %d: ", errcode);
9120 PCRE2_GET_ERROR_MESSAGE(len, errcode, pbuffer);
9121 if (len < 0)
9122 {
9123 switch (len)
9124 {
9125 case PCRE2_ERROR_BADDATA:
9126 printf("PCRE2_ERROR_BADDATA (unknown error number)");
9127 break;
9128
9129 case PCRE2_ERROR_NOMEMORY:
9130 printf("PCRE2_ERROR_NOMEMORY (buffer too small)");
9131 break;
9132
9133 default:
9134 printf("Unexpected return (%d) from pcre2_get_error_message()", len);
9135 break;
9136 }
9137 }
9138 else
9139 {
9140 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, stdout);
9141 }
9142 printf("\n");
9143 if (*endptr == 0) goto EXIT;
9144 arg_error = endptr + 1;
9145 }
9146 /* Control never reaches here */
9147 } /* End of -error handling */
9148
9149/* Initialize things that cannot be done until we know which test mode we are
9150running in. Exercise the general context copying and match data size functions,
9151which are not otherwise used. */
9152
9153code_unit_size = test_mode/8;
9154max_oveccount = DEFAULT_OVECCOUNT;
9155
9156/* Use macros to save a lot of duplication. */
9157
9158#define CREATECONTEXTS \
9159 G(general_context,BITS) = G(pcre2_general_context_create_,BITS)(&my_malloc, &my_free, NULL); \
9160 G(general_context_copy,BITS) = G(pcre2_general_context_copy_,BITS)(G(general_context,BITS)); \
9161 G(default_pat_context,BITS) = G(pcre2_compile_context_create_,BITS)(G(general_context,BITS)); \
9162 G(pat_context,BITS) = G(pcre2_compile_context_copy_,BITS)(G(default_pat_context,BITS)); \
9163 G(default_dat_context,BITS) = G(pcre2_match_context_create_,BITS)(G(general_context,BITS)); \
9164 G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \
9165 G(default_con_context,BITS) = G(pcre2_convert_context_create_,BITS)(G(general_context,BITS)); \
9166 G(con_context,BITS) = G(pcre2_convert_context_copy_,BITS)(G(default_con_context,BITS)); \
9167 G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS))
9168
9169#define CONTEXTTESTS \
9170 (void)G(pcre2_set_compile_extra_options_,BITS)(G(pat_context,BITS), 0); \
9171 (void)G(pcre2_set_max_pattern_length_,BITS)(G(pat_context,BITS), 0); \
9172 (void)G(pcre2_set_offset_limit_,BITS)(G(dat_context,BITS), 0); \
9173 (void)G(pcre2_set_recursion_memory_management_,BITS)(G(dat_context,BITS), my_malloc, my_free, NULL); \
9174 (void)G(pcre2_get_match_data_size_,BITS)(G(match_data,BITS))
9175
9176
9177/* Call the appropriate functions for the current mode, and exercise some
9178functions that are not otherwise called. */
9179
9180#ifdef SUPPORT_PCRE2_8
9181#undef BITS
9182#define BITS 8
9183if (test_mode == PCRE8_MODE)
9184 {
9185 CREATECONTEXTS;
9186 CONTEXTTESTS;
9187 }
9188#endif
9189
9190#ifdef SUPPORT_PCRE2_16
9191#undef BITS
9192#define BITS 16
9193if (test_mode == PCRE16_MODE)
9194 {
9195 CREATECONTEXTS;
9196 CONTEXTTESTS;
9197 }
9198#endif
9199
9200#ifdef SUPPORT_PCRE2_32
9201#undef BITS
9202#define BITS 32
9203if (test_mode == PCRE32_MODE)
9204 {
9205 CREATECONTEXTS;
9206 CONTEXTTESTS;
9207 }
9208#endif
9209
9210/* Set a default parentheses nest limit that is large enough to run the
9211standard tests (this also exercises the function). */
9212
9213PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, PARENS_NEST_DEFAULT);
9214
9215/* Handle command line modifier settings, sending any error messages to
9216stderr. We need to know the mode before modifying the context, and it is tidier
9217to do them all in the same way. */
9218
9219outfile = stderr;
9220if ((arg_pattern != NULL &&
9221 !decode_modifiers((uint8_t *)arg_pattern, CTX_DEFPAT, &def_patctl, NULL)) ||
9222 (arg_subject != NULL &&
9223 !decode_modifiers((uint8_t *)arg_subject, CTX_DEFDAT, NULL, &def_datctl)))
9224 {
9225 yield = 1;
9226 goto EXIT;
9227 }
9228
9229/* Sort out the input and output files, defaulting to stdin/stdout. */
9230
9231infile = stdin;
9232outfile = stdout;
9233
9234if (argc > 1 && strcmp(argv[op], "-") != 0)
9235 {
9236 infile = fopen(argv[op], INPUT_MODE);
9237 if (infile == NULL)
9238 {
9239 printf("** Failed to open '%s': %s\n", argv[op], strerror(errno));
9240 yield = 1;
9241 goto EXIT;
9242 }
9243 }
9244
9245#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
9246if (INTERACTIVE(infile)) using_history();
9247#endif
9248
9249if (argc > 2)
9250 {
9251 outfile = fopen(argv[op+1], OUTPUT_MODE);
9252 if (outfile == NULL)
9253 {
9254 printf("** Failed to open '%s': %s\n", argv[op+1], strerror(errno));
9255 yield = 1;
9256 goto EXIT;
9257 }
9258 }
9259
9260/* Output a heading line unless quiet, then process input lines. */
9261
9262if (!quiet) print_version(outfile);
9263
9264SET(compiled_code, NULL);
9265
9266#ifdef SUPPORT_PCRE2_8
9267preg.re_pcre2_code = NULL;
9268preg.re_match_data = NULL;
9269#endif
9270
9271while (notdone)
9272 {
9273 uint8_t *p;
9274 int rc = PR_OK;
9275 BOOL expectdata = TEST(compiled_code, !=, NULL);
9276#ifdef SUPPORT_PCRE2_8
9277 expectdata |= preg.re_pcre2_code != NULL;
9278#endif
9279
9280 if (extend_inputline(infile, buffer, expectdata? "data> " : " re> ") == NULL)
9281 break;
9282 if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)buffer);
9283 fflush(outfile);
9284 p = buffer;
9285
9286 /* If we have a pattern set up for testing, or we are skipping after a
9287 compile failure, a blank line terminates this test. */
9288
9289 if (expectdata || skipping)
9290 {
9291 while (isspace(*p)) p++;
9292 if (*p == 0)
9293 {
9294#ifdef SUPPORT_PCRE2_8
9295 if (preg.re_pcre2_code != NULL)
9296 {
9297 regfree(&preg);
9298 preg.re_pcre2_code = NULL;
9299 preg.re_match_data = NULL;
9300 }
9301#endif /* SUPPORT_PCRE2_8 */
9302 if (TEST(compiled_code, !=, NULL))
9303 {
9304 SUB1(pcre2_code_free, compiled_code);
9305 SET(compiled_code, NULL);
9306 }
9307 skipping = FALSE;
9308 setlocale(LC_CTYPE, "C");
9309 }
9310
9311 /* Otherwise, if we are not skipping, and the line is not a data comment
9312 line starting with "\=", process a data line. */
9313
9314 else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2])))
9315 {
9316 rc = process_data();
9317 }
9318 }
9319
9320 /* We do not have a pattern set up for testing. Lines starting with # are
9321 either comments or special commands. Blank lines are ignored. Otherwise, the
9322 line must start with a valid delimiter. It is then processed as a pattern
9323 line. A copy of the pattern is left in pbuffer8 for use by callouts. Under
9324 valgrind, make the unused part of the buffer undefined, to catch overruns. */
9325
9326 else if (*p == '#')
9327 {
9328 if (isspace(p[1]) || p[1] == '!' || p[1] == 0) continue;
9329 rc = process_command();
9330 }
9331
9332 else if (strchr("/!\"'`%&-=_:;,@~", *p) != NULL)
9333 {
9334 rc = process_pattern();
9335 dfa_matched = 0;
9336 }
9337
9338 else
9339 {
9340 while (isspace(*p)) p++;
9341 if (*p != 0)
9342 {
9343 fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer,
9344 *buffer);
9345 rc = PR_SKIP;
9346 }
9347 }
9348
9349 if (rc == PR_SKIP && !INTERACTIVE(infile)) skipping = TRUE;
9350 else if (rc == PR_ABEND)
9351 {
9352 fprintf(outfile, "** pcre2test run abandoned\n");
9353 yield = 1;
9354 goto EXIT;
9355 }
9356 }
9357
9358/* Finish off a normal run. */
9359
9360if (INTERACTIVE(infile)) fprintf(outfile, "\n");
9361
9362if (showtotaltimes)
9363 {
9364 const char *pad = "";
9365 fprintf(outfile, "--------------------------------------\n");
9366 if (timeit > 0)
9367 {
9368 fprintf(outfile, "Total compile time %.4f milliseconds\n",
9369 (((double)total_compile_time * 1000.0) / (double)timeit) /
9370 (double)CLOCKS_PER_SEC);
9371 if (total_jit_compile_time > 0)
9372 fprintf(outfile, "Total JIT compile %.4f milliseconds\n",
9373 (((double)total_jit_compile_time * 1000.0) / (double)timeit) /
9374 (double)CLOCKS_PER_SEC);
9375 pad = " ";
9376 }
9377 fprintf(outfile, "Total match time %s%.4f milliseconds\n", pad,
9378 (((double)total_match_time * 1000.0) / (double)timeitm) /
9379 (double)CLOCKS_PER_SEC);
9380 }
9381
9382
9383EXIT:
9384
9385#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
9386if (infile != NULL && INTERACTIVE(infile)) clear_history();
9387#endif
9388
9389if (infile != NULL && infile != stdin) fclose(infile);
9390if (outfile != NULL && outfile != stdout) fclose(outfile);
9391
9392free(buffer);
9393free(dbuffer);
9394free(pbuffer8);
9395free(dfa_workspace);
9396free((void *)locale_tables);
9397free(tables3);
9398PCRE2_MATCH_DATA_FREE(match_data);
9399SUB1(pcre2_code_free, compiled_code);
9400
9401while(patstacknext-- > 0)
9402 {
9403 SET(compiled_code, patstack[patstacknext]);
9404 SUB1(pcre2_code_free, compiled_code);
9405 }
9406
9407PCRE2_JIT_FREE_UNUSED_MEMORY(general_context);
9408if (jit_stack != NULL)
9409 {
9410 PCRE2_JIT_STACK_FREE(jit_stack);
9411 }
9412
9413#define FREECONTEXTS \
9414 G(pcre2_general_context_free_,BITS)(G(general_context,BITS)); \
9415 G(pcre2_general_context_free_,BITS)(G(general_context_copy,BITS)); \
9416 G(pcre2_compile_context_free_,BITS)(G(pat_context,BITS)); \
9417 G(pcre2_compile_context_free_,BITS)(G(default_pat_context,BITS)); \
9418 G(pcre2_match_context_free_,BITS)(G(dat_context,BITS)); \
9419 G(pcre2_match_context_free_,BITS)(G(default_dat_context,BITS)); \
9420 G(pcre2_convert_context_free_,BITS)(G(default_con_context,BITS)); \
9421 G(pcre2_convert_context_free_,BITS)(G(con_context,BITS));
9422
9423#ifdef SUPPORT_PCRE2_8
9424#undef BITS
9425#define BITS 8
9426if (preg.re_pcre2_code != NULL) regfree(&preg);
9427FREECONTEXTS;
9428#endif
9429
9430#ifdef SUPPORT_PCRE2_16
9431#undef BITS
9432#define BITS 16
9433free(pbuffer16);
9434FREECONTEXTS;
9435#endif
9436
9437#ifdef SUPPORT_PCRE2_32
9438#undef BITS
9439#define BITS 32
9440free(pbuffer32);
9441FREECONTEXTS;
9442#endif
9443
9444#if defined(__VMS)
9445 yield = SS$_NORMAL; /* Return values via DCL symbols */
9446#endif
9447
9448return yield;
9449}
9450
9451/* End of pcre2test.c */