blob: 3b6a07d422bd3311ffc7b7397f2bb6919950ca16 [file] [log] [blame]
Nick Kralevichf73ff172014-09-27 12:41:49 -07001/*************************************************
2* Perl-Compatible Regular Expressions *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
Janis Danisevskis53e448c2016-03-31 13:35:25 +01009 Original API code Copyright (c) 1997-2012 University of Cambridge
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070010 New API code Copyright (c) 2016-2022 University of Cambridge
Nick Kralevichf73ff172014-09-27 12:41:49 -070011
12-----------------------------------------------------------------------------
13Redistribution and use in source and binary forms, with or without
14modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37POSSIBILITY OF SUCH DAMAGE.
38-----------------------------------------------------------------------------
39*/
40
41
42/* This module contains a PCRE private debugging function for printing out the
43internal form of a compiled regular expression, along with some supporting
Janis Danisevskis53e448c2016-03-31 13:35:25 +010044local functions. This source file is #included in pcre2test.c at each supported
45code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
46that comprise the library. It can also optionally be included in
47pcre2_compile.c for detailed debugging in error situations. */
Nick Kralevichf73ff172014-09-27 12:41:49 -070048
Nick Kralevichf73ff172014-09-27 12:41:49 -070049
Janis Danisevskis53e448c2016-03-31 13:35:25 +010050/* Tables of operator names. The same 8-bit table is used for all code unit
51widths, so it must be defined only once. The list itself is defined in
52pcre2_internal.h, which is #included by pcre2test before this file. */
Nick Kralevichf73ff172014-09-27 12:41:49 -070053
Janis Danisevskis53e448c2016-03-31 13:35:25 +010054#ifndef OP_LISTS_DEFINED
55static const char *OP_names[] = { OP_NAME_LIST };
56#define OP_LISTS_DEFINED
Nick Kralevichf73ff172014-09-27 12:41:49 -070057#endif
58
Janis Danisevskis53e448c2016-03-31 13:35:25 +010059/* The functions and tables herein must all have mode-dependent names. */
Nick Kralevichf73ff172014-09-27 12:41:49 -070060
Janis Danisevskis53e448c2016-03-31 13:35:25 +010061#define OP_lengths PCRE2_SUFFIX(OP_lengths_)
62#define get_ucpname PCRE2_SUFFIX(get_ucpname_)
63#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
64#define print_char PCRE2_SUFFIX(print_char_)
65#define print_custring PCRE2_SUFFIX(print_custring_)
66#define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_)
67#define print_prop PCRE2_SUFFIX(print_prop_)
Nick Kralevichf73ff172014-09-27 12:41:49 -070068
Janis Danisevskis53e448c2016-03-31 13:35:25 +010069/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
70the definition is next to the definition of the opcodes in pcre2_internal.h.
71The contents of the table are, however, mode-dependent. */
Nick Kralevichf73ff172014-09-27 12:41:49 -070072
Janis Danisevskis53e448c2016-03-31 13:35:25 +010073static const uint8_t OP_lengths[] = { OP_LENGTHS };
Nick Kralevichf73ff172014-09-27 12:41:49 -070074
75
76
77/*************************************************
Janis Danisevskis53e448c2016-03-31 13:35:25 +010078* Print one character from a string *
Nick Kralevichf73ff172014-09-27 12:41:49 -070079*************************************************/
80
Janis Danisevskis53e448c2016-03-31 13:35:25 +010081/* In UTF mode the character may occupy more than one code unit.
82
83Arguments:
84 f file to write to
85 ptr pointer to first code unit of the character
86 utf TRUE if string is UTF (will be FALSE if UTF is not supported)
87
88Returns: number of additional code units used
89*/
90
Nick Kralevichf73ff172014-09-27 12:41:49 -070091static unsigned int
Janis Danisevskis53e448c2016-03-31 13:35:25 +010092print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
Nick Kralevichf73ff172014-09-27 12:41:49 -070093{
Janis Danisevskis53e448c2016-03-31 13:35:25 +010094uint32_t c = *ptr;
95BOOL one_code_unit = !utf;
Nick Kralevichf73ff172014-09-27 12:41:49 -070096
Janis Danisevskis53e448c2016-03-31 13:35:25 +010097/* If UTF is supported and requested, check for a valid single code unit. */
Nick Kralevichf73ff172014-09-27 12:41:49 -070098
Janis Danisevskis53e448c2016-03-31 13:35:25 +010099#ifdef SUPPORT_UNICODE
100if (utf)
101 {
102#if PCRE2_CODE_UNIT_WIDTH == 8
103 one_code_unit = c < 0x80;
104#elif PCRE2_CODE_UNIT_WIDTH == 16
105 one_code_unit = (c & 0xfc00) != 0xd800;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700106#else
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100107 one_code_unit = (c & 0xfffff800u) != 0xd800u;
108#endif /* CODE_UNIT_WIDTH */
109 }
110#endif /* SUPPORT_UNICODE */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700111
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100112/* Handle a valid one-code-unit character at any width. */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700113
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100114if (one_code_unit)
Nick Kralevichf73ff172014-09-27 12:41:49 -0700115 {
116 if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
117 else if (c < 0x80) fprintf(f, "\\x%02x", c);
118 else fprintf(f, "\\x{%02x}", c);
119 return 0;
120 }
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100121
122/* Code for invalid UTF code units and multi-unit UTF characters is different
123for each width. If UTF is not supported, control should never get here, but we
124need a return statement to keep the compiler happy. */
125
126#ifndef SUPPORT_UNICODE
127return 0;
128#else
129
130/* Malformed UTF-8 should occur only if the sanity check has been turned off.
131Rather than swallow random bytes, just stop if we hit a bad one. Print it with
132\X instead of \x as an indication. */
133
134#if PCRE2_CODE_UNIT_WIDTH == 8
135if ((c & 0xc0) != 0xc0)
136 {
137 fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
138 return 0;
139 }
Nick Kralevichf73ff172014-09-27 12:41:49 -0700140else
141 {
142 int i;
143 int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
144 int s = 6*a;
145 c = (c & PRIV(utf8_table3)[a]) << s;
146 for (i = 1; i <= a; i++)
147 {
Nick Kralevichf73ff172014-09-27 12:41:49 -0700148 if ((ptr[i] & 0xc0) != 0x80)
149 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100150 fprintf(f, "\\X{%x}", c); /* Invalid secondary byte */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700151 return i - 1;
152 }
Nick Kralevichf73ff172014-09-27 12:41:49 -0700153 s -= 6;
154 c |= (ptr[i] & 0x3f) << s;
155 }
156 fprintf(f, "\\x{%x}", c);
157 return a;
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100158}
159#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700160
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100161/* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
162Print it with \X instead of \x as an indication. */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700163
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100164#if PCRE2_CODE_UNIT_WIDTH == 16
165if ((ptr[1] & 0xfc00) != 0xdc00)
Nick Kralevichf73ff172014-09-27 12:41:49 -0700166 {
Nick Kralevichf73ff172014-09-27 12:41:49 -0700167 fprintf(f, "\\X{%x}", c);
168 return 0;
169 }
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100170c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
171fprintf(f, "\\x{%x}", c);
172return 1;
173#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700174
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100175/* For UTF-32 we get here only for a malformed code unit, which should only
176occur if the sanity check has been turned off. Print it with \X instead of \x
177as an indication. */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700178
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100179#if PCRE2_CODE_UNIT_WIDTH == 32
180fprintf(f, "\\X{%x}", c);
181return 0;
182#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
183#endif /* SUPPORT_UNICODE */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700184}
185
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100186
187
Nick Kralevichf73ff172014-09-27 12:41:49 -0700188/*************************************************
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100189* Print string as a list of code units *
Nick Kralevichf73ff172014-09-27 12:41:49 -0700190*************************************************/
191
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100192/* These take no account of UTF as they always print each individual code unit.
193The string is zero-terminated for print_custring(); the length is given for
194print_custring_bylen().
195
196Arguments:
197 f file to write to
198 ptr point to the string
199 len length for print_custring_bylen()
200
201Returns: nothing
202*/
203
Nick Kralevichf73ff172014-09-27 12:41:49 -0700204static void
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100205print_custring(FILE *f, PCRE2_SPTR ptr)
Nick Kralevichf73ff172014-09-27 12:41:49 -0700206{
207while (*ptr != '\0')
208 {
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700209 uint32_t c = *ptr++;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700210 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
211 }
212}
213
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100214static void
215print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
216{
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100217for (; len > 0; len--)
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100218 {
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700219 uint32_t c = *ptr++;
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100220 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
221 }
222}
223
224
225
Nick Kralevichf73ff172014-09-27 12:41:49 -0700226/*************************************************
227* Find Unicode property name *
228*************************************************/
229
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100230/* When there is no UTF/UCP support, the table of names does not exist. This
231function should not be called in such configurations, because a pattern that
232tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700233into the main code, however, we just put one into this function.
234
235Now that the table contains both full names and their abbreviations, we do some
236fiddling to try to get the full name, which is either the longer of two found
237names, or a 3-character script name. */
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100238
Nick Kralevichf73ff172014-09-27 12:41:49 -0700239static const char *
240get_ucpname(unsigned int ptype, unsigned int pvalue)
241{
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100242#ifdef SUPPORT_UNICODE
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700243int count = 0;
244const char *yield = "??";
245size_t len = 0;
246unsigned int ptypex = (ptype == PT_SC)? PT_SCX : ptype;
247
248for (int i = PRIV(utt_size) - 1; i >= 0; i--)
Nick Kralevichf73ff172014-09-27 12:41:49 -0700249 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700250 const ucp_type_table *u = PRIV(utt) + i;
251
252 if ((ptype == u->type || ptypex == u->type) && pvalue == u->value)
253 {
254 const char *s = PRIV(utt_names) + u->name_offset;
255 size_t sl = strlen(s);
256
257 if (sl == 3 && (u->type == PT_SC || u->type == PT_SCX))
258 {
259 yield = s;
260 break;
261 }
262
263 if (sl > len)
264 {
265 yield = s;
266 len = sl;
267 }
268
269 if (++count >= 2) break;
270 }
Nick Kralevichf73ff172014-09-27 12:41:49 -0700271 }
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700272
273return yield;
274
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100275#else /* No UTF support */
276(void)ptype;
277(void)pvalue;
278return "??";
279#endif /* SUPPORT_UNICODE */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700280}
281
282
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100283
Nick Kralevichf73ff172014-09-27 12:41:49 -0700284/*************************************************
285* Print Unicode property value *
286*************************************************/
287
288/* "Normal" properties can be printed from tables. The PT_CLIST property is a
289pseudo-property that contains a pointer to a list of case-equivalent
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100290characters.
291
292Arguments:
293 f file to write to
294 code pointer in the compiled code
295 before text to print before
296 after text to print after
297
298Returns: nothing
299*/
Nick Kralevichf73ff172014-09-27 12:41:49 -0700300
301static void
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100302print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
Nick Kralevichf73ff172014-09-27 12:41:49 -0700303{
304if (code[1] != PT_CLIST)
305 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700306 const char *sc = (code[1] == PT_SC)? "script:" : "";
307 const char *s = get_ucpname(code[1], code[2]);
308 fprintf(f, "%s%s %s%c%s%s", before, OP_names[*code], sc, toupper(s[0]), s+1, after);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700309 }
310else
311 {
312 const char *not = (*code == OP_PROP)? "" : "not ";
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100313 const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
Nick Kralevichf73ff172014-09-27 12:41:49 -0700314 fprintf (f, "%s%sclist", before, not);
315 while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
316 fprintf(f, "%s", after);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700317 }
318}
319
320
321
Nick Kralevichf73ff172014-09-27 12:41:49 -0700322/*************************************************
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100323* Print compiled pattern *
Nick Kralevichf73ff172014-09-27 12:41:49 -0700324*************************************************/
325
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100326/* The print_lengths flag controls whether offsets and lengths of items are
327printed. Lenths can be turned off from pcre2test so that automatic tests on
328bytecode can be written that do not depend on the value of LINK_SIZE.
Nick Kralevichf73ff172014-09-27 12:41:49 -0700329
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100330Arguments:
331 re a compiled pattern
332 f the file to write to
333 print_lengths show various lengths
334
335Returns: nothing
336*/
337
338static void
339pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
Nick Kralevichf73ff172014-09-27 12:41:49 -0700340{
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100341PCRE2_SPTR codestart, nametable, code;
342uint32_t nesize = re->name_entry_size;
343BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700344
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100345nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
346code = codestart = nametable + re->name_count * re->name_entry_size;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700347
348for(;;)
349 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100350 PCRE2_SPTR ccode;
351 uint32_t c;
352 int i;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700353 const char *flag = " ";
Nick Kralevichf73ff172014-09-27 12:41:49 -0700354 unsigned int extra = 0;
355
356 if (print_lengths)
357 fprintf(f, "%3d ", (int)(code - codestart));
358 else
359 fprintf(f, " ");
360
361 switch(*code)
362 {
363/* ========================================================================== */
364 /* These cases are never obeyed. This is a fudge that causes a compile-
365 time error if the vectors OP_names or OP_lengths, which are indexed
366 by opcode, are not the correct length. It seems to be the only way to do
367 such a check at compile time, as the sizeof() operator does not work in
368 the C preprocessor. */
369
370 case OP_TABLE_LENGTH:
371 case OP_TABLE_LENGTH +
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100372 ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
373 (sizeof(OP_lengths) == OP_TABLE_LENGTH)):
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700374 return;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700375/* ========================================================================== */
376
377 case OP_END:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100378 fprintf(f, " %s\n", OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700379 fprintf(f, "------------------------------------------------------------------\n");
380 return;
381
382 case OP_CHAR:
383 fprintf(f, " ");
384 do
385 {
386 code++;
387 code += 1 + print_char(f, code, utf);
388 }
389 while (*code == OP_CHAR);
390 fprintf(f, "\n");
391 continue;
392
393 case OP_CHARI:
394 fprintf(f, " /i ");
395 do
396 {
397 code++;
398 code += 1 + print_char(f, code, utf);
399 }
400 while (*code == OP_CHARI);
401 fprintf(f, "\n");
402 continue;
403
404 case OP_CBRA:
405 case OP_CBRAPOS:
406 case OP_SCBRA:
407 case OP_SCBRAPOS:
408 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
409 else fprintf(f, " ");
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100410 fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
Nick Kralevichf73ff172014-09-27 12:41:49 -0700411 break;
412
413 case OP_BRA:
414 case OP_BRAPOS:
415 case OP_SBRA:
416 case OP_SBRAPOS:
417 case OP_KETRMAX:
418 case OP_KETRMIN:
419 case OP_KETRPOS:
420 case OP_ALT:
421 case OP_KET:
422 case OP_ASSERT:
423 case OP_ASSERT_NOT:
424 case OP_ASSERTBACK:
425 case OP_ASSERTBACK_NOT:
Elliott Hughes2dbd7d22020-06-03 14:32:37 -0700426 case OP_ASSERT_NA:
427 case OP_ASSERTBACK_NA:
Nick Kralevichf73ff172014-09-27 12:41:49 -0700428 case OP_ONCE:
Elliott Hughes0c26e192019-08-07 12:24:46 -0700429 case OP_SCRIPT_RUN:
Nick Kralevichf73ff172014-09-27 12:41:49 -0700430 case OP_COND:
431 case OP_SCOND:
432 case OP_REVERSE:
433 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
434 else fprintf(f, " ");
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100435 fprintf(f, "%s", OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700436 break;
437
438 case OP_CLOSE:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100439 fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
Nick Kralevichf73ff172014-09-27 12:41:49 -0700440 break;
441
442 case OP_CREF:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100443 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700444 break;
445
446 case OP_DNCREF:
447 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100448 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700449 fprintf(f, " %s Cond ref <", flag);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100450 print_custring(f, entry);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700451 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
452 }
453 break;
454
455 case OP_RREF:
456 c = GET2(code, 1);
457 if (c == RREF_ANY)
458 fprintf(f, " Cond recurse any");
459 else
460 fprintf(f, " Cond recurse %d", c);
461 break;
462
463 case OP_DNRREF:
464 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100465 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700466 fprintf(f, " %s Cond recurse <", flag);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100467 print_custring(f, entry);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700468 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
469 }
470 break;
471
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100472 case OP_FALSE:
473 fprintf(f, " Cond false");
474 break;
475
476 case OP_TRUE:
477 fprintf(f, " Cond true");
Nick Kralevichf73ff172014-09-27 12:41:49 -0700478 break;
479
480 case OP_STARI:
481 case OP_MINSTARI:
482 case OP_POSSTARI:
483 case OP_PLUSI:
484 case OP_MINPLUSI:
485 case OP_POSPLUSI:
486 case OP_QUERYI:
487 case OP_MINQUERYI:
488 case OP_POSQUERYI:
489 flag = "/i";
490 /* Fall through */
491 case OP_STAR:
492 case OP_MINSTAR:
493 case OP_POSSTAR:
494 case OP_PLUS:
495 case OP_MINPLUS:
496 case OP_POSPLUS:
497 case OP_QUERY:
498 case OP_MINQUERY:
499 case OP_POSQUERY:
500 case OP_TYPESTAR:
501 case OP_TYPEMINSTAR:
502 case OP_TYPEPOSSTAR:
503 case OP_TYPEPLUS:
504 case OP_TYPEMINPLUS:
505 case OP_TYPEPOSPLUS:
506 case OP_TYPEQUERY:
507 case OP_TYPEMINQUERY:
508 case OP_TYPEPOSQUERY:
509 fprintf(f, " %s ", flag);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100510
Nick Kralevichf73ff172014-09-27 12:41:49 -0700511 if (*code >= OP_TYPESTAR)
512 {
513 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
514 {
515 print_prop(f, code + 1, "", " ");
516 extra = 2;
517 }
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100518 else fprintf(f, "%s", OP_names[code[1]]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700519 }
520 else extra = print_char(f, code+1, utf);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100521 fprintf(f, "%s", OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700522 break;
523
524 case OP_EXACTI:
525 case OP_UPTOI:
526 case OP_MINUPTOI:
527 case OP_POSUPTOI:
528 flag = "/i";
529 /* Fall through */
530 case OP_EXACT:
531 case OP_UPTO:
532 case OP_MINUPTO:
533 case OP_POSUPTO:
534 fprintf(f, " %s ", flag);
535 extra = print_char(f, code + 1 + IMM2_SIZE, utf);
536 fprintf(f, "{");
537 if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
538 fprintf(f, "%d}", GET2(code,1));
539 if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
540 else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
541 break;
542
543 case OP_TYPEEXACT:
544 case OP_TYPEUPTO:
545 case OP_TYPEMINUPTO:
546 case OP_TYPEPOSUPTO:
547 if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
548 {
549 print_prop(f, code + IMM2_SIZE + 1, " ", " ");
550 extra = 2;
551 }
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100552 else fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700553 fprintf(f, "{");
554 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
555 fprintf(f, "%d}", GET2(code,1));
556 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
557 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
558 break;
559
560 case OP_NOTI:
561 flag = "/i";
562 /* Fall through */
563 case OP_NOT:
564 fprintf(f, " %s [^", flag);
565 extra = print_char(f, code + 1, utf);
566 fprintf(f, "]");
567 break;
568
569 case OP_NOTSTARI:
570 case OP_NOTMINSTARI:
571 case OP_NOTPOSSTARI:
572 case OP_NOTPLUSI:
573 case OP_NOTMINPLUSI:
574 case OP_NOTPOSPLUSI:
575 case OP_NOTQUERYI:
576 case OP_NOTMINQUERYI:
577 case OP_NOTPOSQUERYI:
578 flag = "/i";
579 /* Fall through */
580
581 case OP_NOTSTAR:
582 case OP_NOTMINSTAR:
583 case OP_NOTPOSSTAR:
584 case OP_NOTPLUS:
585 case OP_NOTMINPLUS:
586 case OP_NOTPOSPLUS:
587 case OP_NOTQUERY:
588 case OP_NOTMINQUERY:
589 case OP_NOTPOSQUERY:
590 fprintf(f, " %s [^", flag);
591 extra = print_char(f, code + 1, utf);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100592 fprintf(f, "]%s", OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700593 break;
594
595 case OP_NOTEXACTI:
596 case OP_NOTUPTOI:
597 case OP_NOTMINUPTOI:
598 case OP_NOTPOSUPTOI:
599 flag = "/i";
600 /* Fall through */
601
602 case OP_NOTEXACT:
603 case OP_NOTUPTO:
604 case OP_NOTMINUPTO:
605 case OP_NOTPOSUPTO:
606 fprintf(f, " %s [^", flag);
607 extra = print_char(f, code + 1 + IMM2_SIZE, utf);
608 fprintf(f, "]{");
609 if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
610 fprintf(f, "%d}", GET2(code,1));
611 if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
612 else
613 if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
614 break;
615
616 case OP_RECURSE:
617 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
618 else fprintf(f, " ");
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100619 fprintf(f, "%s", OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700620 break;
621
622 case OP_REFI:
623 flag = "/i";
624 /* Fall through */
625 case OP_REF:
626 fprintf(f, " %s \\%d", flag, GET2(code,1));
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100627 ccode = code + OP_lengths[*code];
Nick Kralevichf73ff172014-09-27 12:41:49 -0700628 goto CLASS_REF_REPEAT;
629
630 case OP_DNREFI:
631 flag = "/i";
632 /* Fall through */
633 case OP_DNREF:
634 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100635 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700636 fprintf(f, " %s \\k<", flag);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100637 print_custring(f, entry);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700638 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
639 }
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100640 ccode = code + OP_lengths[*code];
Nick Kralevichf73ff172014-09-27 12:41:49 -0700641 goto CLASS_REF_REPEAT;
642
643 case OP_CALLOUT:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100644 fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
645 GET(code, 1), GET(code, 1 + LINK_SIZE));
646 break;
647
648 case OP_CALLOUT_STR:
649 c = code[1 + 4*LINK_SIZE];
650 fprintf(f, " %s %c", OP_names[*code], c);
651 extra = GET(code, 1 + 2*LINK_SIZE);
652 print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
653 for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
654 if (c == PRIV(callout_start_delims)[i])
655 {
656 c = PRIV(callout_end_delims)[i];
657 break;
658 }
659 fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
660 GET(code, 1 + LINK_SIZE));
Nick Kralevichf73ff172014-09-27 12:41:49 -0700661 break;
662
663 case OP_PROP:
664 case OP_NOTPROP:
665 print_prop(f, code, " ", "");
666 break;
667
668 /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
669 in having this code always here, and it makes it less messy without all
670 those #ifdefs. */
671
672 case OP_CLASS:
673 case OP_NCLASS:
674 case OP_XCLASS:
675 {
Nick Kralevichf73ff172014-09-27 12:41:49 -0700676 unsigned int min, max;
677 BOOL printmap;
678 BOOL invertmap = FALSE;
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100679 uint8_t *map;
680 uint8_t inverted_map[32];
Nick Kralevichf73ff172014-09-27 12:41:49 -0700681
682 fprintf(f, " [");
683
684 if (*code == OP_XCLASS)
685 {
686 extra = GET(code, 1);
687 ccode = code + LINK_SIZE + 1;
688 printmap = (*ccode & XCL_MAP) != 0;
689 if ((*ccode & XCL_NOT) != 0)
690 {
691 invertmap = (*ccode & XCL_HASPROP) == 0;
692 fprintf(f, "^");
693 }
694 ccode++;
695 }
696 else
697 {
698 printmap = TRUE;
699 ccode = code + 1;
700 }
701
702 /* Print a bit map */
703
704 if (printmap)
705 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100706 map = (uint8_t *)ccode;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700707 if (invertmap)
708 {
Elliott Hughes0c26e192019-08-07 12:24:46 -0700709 /* Using 255 ^ instead of ~ avoids clang sanitize warning. */
710 for (i = 0; i < 32; i++) inverted_map[i] = 255 ^ map[i];
Nick Kralevichf73ff172014-09-27 12:41:49 -0700711 map = inverted_map;
712 }
713
714 for (i = 0; i < 256; i++)
715 {
Elliott Hughes0c26e192019-08-07 12:24:46 -0700716 if ((map[i/8] & (1u << (i&7))) != 0)
Nick Kralevichf73ff172014-09-27 12:41:49 -0700717 {
718 int j;
719 for (j = i+1; j < 256; j++)
Elliott Hughes0c26e192019-08-07 12:24:46 -0700720 if ((map[j/8] & (1u << (j&7))) == 0) break;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700721 if (i == '-' || i == ']') fprintf(f, "\\");
722 if (PRINTABLE(i)) fprintf(f, "%c", i);
723 else fprintf(f, "\\x%02x", i);
724 if (--j > i)
725 {
726 if (j != i + 1) fprintf(f, "-");
727 if (j == '-' || j == ']') fprintf(f, "\\");
728 if (PRINTABLE(j)) fprintf(f, "%c", j);
729 else fprintf(f, "\\x%02x", j);
730 }
731 i = j;
732 }
733 }
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100734 ccode += 32 / sizeof(PCRE2_UCHAR);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700735 }
736
737 /* For an XCLASS there is always some additional data */
738
739 if (*code == OP_XCLASS)
740 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100741 PCRE2_UCHAR ch;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700742 while ((ch = *ccode++) != XCL_END)
743 {
744 BOOL not = FALSE;
745 const char *notch = "";
746
747 switch(ch)
748 {
749 case XCL_NOTPROP:
750 not = TRUE;
751 notch = "^";
752 /* Fall through */
753
754 case XCL_PROP:
755 {
756 unsigned int ptype = *ccode++;
757 unsigned int pvalue = *ccode++;
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700758 const char *s;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700759
760 switch(ptype)
761 {
762 case PT_PXGRAPH:
763 fprintf(f, "[:%sgraph:]", notch);
764 break;
765
766 case PT_PXPRINT:
767 fprintf(f, "[:%sprint:]", notch);
768 break;
769
770 case PT_PXPUNCT:
771 fprintf(f, "[:%spunct:]", notch);
772 break;
773
774 default:
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700775 s = get_ucpname(ptype, pvalue);
776 fprintf(f, "\\%c{%c%s}", (not? 'P':'p'), toupper(s[0]), s+1);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700777 break;
778 }
779 }
780 break;
781
782 default:
783 ccode += 1 + print_char(f, ccode, utf);
784 if (ch == XCL_RANGE)
785 {
786 fprintf(f, "-");
787 ccode += 1 + print_char(f, ccode, utf);
788 }
789 break;
790 }
791 }
792 }
793
794 /* Indicate a non-UTF class which was created by negation */
795
796 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
797
798 /* Handle repeats after a class or a back reference */
799
800 CLASS_REF_REPEAT:
801 switch(*ccode)
802 {
803 case OP_CRSTAR:
804 case OP_CRMINSTAR:
805 case OP_CRPLUS:
806 case OP_CRMINPLUS:
807 case OP_CRQUERY:
808 case OP_CRMINQUERY:
809 case OP_CRPOSSTAR:
810 case OP_CRPOSPLUS:
811 case OP_CRPOSQUERY:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100812 fprintf(f, "%s", OP_names[*ccode]);
813 extra += OP_lengths[*ccode];
Nick Kralevichf73ff172014-09-27 12:41:49 -0700814 break;
815
816 case OP_CRRANGE:
817 case OP_CRMINRANGE:
818 case OP_CRPOSRANGE:
819 min = GET2(ccode,1);
820 max = GET2(ccode,1 + IMM2_SIZE);
821 if (max == 0) fprintf(f, "{%u,}", min);
822 else fprintf(f, "{%u,%u}", min, max);
823 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
824 else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100825 extra += OP_lengths[*ccode];
Nick Kralevichf73ff172014-09-27 12:41:49 -0700826 break;
827
828 /* Do nothing if it's not a repeat; this code stops picky compilers
829 warning about the lack of a default code path. */
830
831 default:
832 break;
833 }
834 }
835 break;
836
837 case OP_MARK:
Elliott Hughes653c2102019-01-09 15:41:36 -0800838 case OP_COMMIT_ARG:
Nick Kralevichf73ff172014-09-27 12:41:49 -0700839 case OP_PRUNE_ARG:
840 case OP_SKIP_ARG:
841 case OP_THEN_ARG:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100842 fprintf(f, " %s ", OP_names[*code]);
843 print_custring_bylen(f, code + 2, code[1]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700844 extra += code[1];
845 break;
846
847 case OP_THEN:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100848 fprintf(f, " %s", OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700849 break;
850
851 case OP_CIRCM:
852 case OP_DOLLM:
853 flag = "/m";
854 /* Fall through */
855
856 /* Anything else is just an item with no data, but possibly a flag. */
857
858 default:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100859 fprintf(f, " %s %s", flag, OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700860 break;
861 }
862
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100863 code += OP_lengths[*code] + extra;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700864 fprintf(f, "\n");
865 }
866}
867
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100868/* End of pcre2_printint.c */