blob: b9bab025ab38fce379683a00ef9a137b6f7dad86 [file] [log] [blame]
Nick Kralevichf73ff172014-09-27 12:41:49 -07001/*************************************************
2* Perl-Compatible Regular Expressions *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
Janis Danisevskis53e448c2016-03-31 13:35:25 +01009 Original API code Copyright (c) 1997-2012 University of Cambridge
Elliott Hughes0c26e192019-08-07 12:24:46 -070010 New API code Copyright (c) 2016-2019 University of Cambridge
Nick Kralevichf73ff172014-09-27 12:41:49 -070011
12-----------------------------------------------------------------------------
13Redistribution and use in source and binary forms, with or without
14modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37POSSIBILITY OF SUCH DAMAGE.
38-----------------------------------------------------------------------------
39*/
40
41
42/* This module contains a PCRE private debugging function for printing out the
43internal form of a compiled regular expression, along with some supporting
Janis Danisevskis53e448c2016-03-31 13:35:25 +010044local functions. This source file is #included in pcre2test.c at each supported
45code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
46that comprise the library. It can also optionally be included in
47pcre2_compile.c for detailed debugging in error situations. */
Nick Kralevichf73ff172014-09-27 12:41:49 -070048
Nick Kralevichf73ff172014-09-27 12:41:49 -070049
Janis Danisevskis53e448c2016-03-31 13:35:25 +010050/* Tables of operator names. The same 8-bit table is used for all code unit
51widths, so it must be defined only once. The list itself is defined in
52pcre2_internal.h, which is #included by pcre2test before this file. */
Nick Kralevichf73ff172014-09-27 12:41:49 -070053
Janis Danisevskis53e448c2016-03-31 13:35:25 +010054#ifndef OP_LISTS_DEFINED
55static const char *OP_names[] = { OP_NAME_LIST };
56#define OP_LISTS_DEFINED
Nick Kralevichf73ff172014-09-27 12:41:49 -070057#endif
58
Janis Danisevskis53e448c2016-03-31 13:35:25 +010059/* The functions and tables herein must all have mode-dependent names. */
Nick Kralevichf73ff172014-09-27 12:41:49 -070060
Janis Danisevskis53e448c2016-03-31 13:35:25 +010061#define OP_lengths PCRE2_SUFFIX(OP_lengths_)
62#define get_ucpname PCRE2_SUFFIX(get_ucpname_)
63#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
64#define print_char PCRE2_SUFFIX(print_char_)
65#define print_custring PCRE2_SUFFIX(print_custring_)
66#define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_)
67#define print_prop PCRE2_SUFFIX(print_prop_)
Nick Kralevichf73ff172014-09-27 12:41:49 -070068
Janis Danisevskis53e448c2016-03-31 13:35:25 +010069/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
70the definition is next to the definition of the opcodes in pcre2_internal.h.
71The contents of the table are, however, mode-dependent. */
Nick Kralevichf73ff172014-09-27 12:41:49 -070072
Janis Danisevskis53e448c2016-03-31 13:35:25 +010073static const uint8_t OP_lengths[] = { OP_LENGTHS };
Nick Kralevichf73ff172014-09-27 12:41:49 -070074
75
76
77/*************************************************
Janis Danisevskis53e448c2016-03-31 13:35:25 +010078* Print one character from a string *
Nick Kralevichf73ff172014-09-27 12:41:49 -070079*************************************************/
80
Janis Danisevskis53e448c2016-03-31 13:35:25 +010081/* In UTF mode the character may occupy more than one code unit.
82
83Arguments:
84 f file to write to
85 ptr pointer to first code unit of the character
86 utf TRUE if string is UTF (will be FALSE if UTF is not supported)
87
88Returns: number of additional code units used
89*/
90
Nick Kralevichf73ff172014-09-27 12:41:49 -070091static unsigned int
Janis Danisevskis53e448c2016-03-31 13:35:25 +010092print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
Nick Kralevichf73ff172014-09-27 12:41:49 -070093{
Janis Danisevskis53e448c2016-03-31 13:35:25 +010094uint32_t c = *ptr;
95BOOL one_code_unit = !utf;
Nick Kralevichf73ff172014-09-27 12:41:49 -070096
Janis Danisevskis53e448c2016-03-31 13:35:25 +010097/* If UTF is supported and requested, check for a valid single code unit. */
Nick Kralevichf73ff172014-09-27 12:41:49 -070098
Janis Danisevskis53e448c2016-03-31 13:35:25 +010099#ifdef SUPPORT_UNICODE
100if (utf)
101 {
102#if PCRE2_CODE_UNIT_WIDTH == 8
103 one_code_unit = c < 0x80;
104#elif PCRE2_CODE_UNIT_WIDTH == 16
105 one_code_unit = (c & 0xfc00) != 0xd800;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700106#else
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100107 one_code_unit = (c & 0xfffff800u) != 0xd800u;
108#endif /* CODE_UNIT_WIDTH */
109 }
110#endif /* SUPPORT_UNICODE */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700111
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100112/* Handle a valid one-code-unit character at any width. */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700113
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100114if (one_code_unit)
Nick Kralevichf73ff172014-09-27 12:41:49 -0700115 {
116 if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
117 else if (c < 0x80) fprintf(f, "\\x%02x", c);
118 else fprintf(f, "\\x{%02x}", c);
119 return 0;
120 }
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100121
122/* Code for invalid UTF code units and multi-unit UTF characters is different
123for each width. If UTF is not supported, control should never get here, but we
124need a return statement to keep the compiler happy. */
125
126#ifndef SUPPORT_UNICODE
127return 0;
128#else
129
130/* Malformed UTF-8 should occur only if the sanity check has been turned off.
131Rather than swallow random bytes, just stop if we hit a bad one. Print it with
132\X instead of \x as an indication. */
133
134#if PCRE2_CODE_UNIT_WIDTH == 8
135if ((c & 0xc0) != 0xc0)
136 {
137 fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
138 return 0;
139 }
Nick Kralevichf73ff172014-09-27 12:41:49 -0700140else
141 {
142 int i;
143 int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
144 int s = 6*a;
145 c = (c & PRIV(utf8_table3)[a]) << s;
146 for (i = 1; i <= a; i++)
147 {
Nick Kralevichf73ff172014-09-27 12:41:49 -0700148 if ((ptr[i] & 0xc0) != 0x80)
149 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100150 fprintf(f, "\\X{%x}", c); /* Invalid secondary byte */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700151 return i - 1;
152 }
Nick Kralevichf73ff172014-09-27 12:41:49 -0700153 s -= 6;
154 c |= (ptr[i] & 0x3f) << s;
155 }
156 fprintf(f, "\\x{%x}", c);
157 return a;
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100158}
159#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700160
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100161/* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
162Print it with \X instead of \x as an indication. */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700163
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100164#if PCRE2_CODE_UNIT_WIDTH == 16
165if ((ptr[1] & 0xfc00) != 0xdc00)
Nick Kralevichf73ff172014-09-27 12:41:49 -0700166 {
Nick Kralevichf73ff172014-09-27 12:41:49 -0700167 fprintf(f, "\\X{%x}", c);
168 return 0;
169 }
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100170c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
171fprintf(f, "\\x{%x}", c);
172return 1;
173#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700174
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100175/* For UTF-32 we get here only for a malformed code unit, which should only
176occur if the sanity check has been turned off. Print it with \X instead of \x
177as an indication. */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700178
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100179#if PCRE2_CODE_UNIT_WIDTH == 32
180fprintf(f, "\\X{%x}", c);
181return 0;
182#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
183#endif /* SUPPORT_UNICODE */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700184}
185
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100186
187
Nick Kralevichf73ff172014-09-27 12:41:49 -0700188/*************************************************
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100189* Print string as a list of code units *
Nick Kralevichf73ff172014-09-27 12:41:49 -0700190*************************************************/
191
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100192/* These take no account of UTF as they always print each individual code unit.
193The string is zero-terminated for print_custring(); the length is given for
194print_custring_bylen().
195
196Arguments:
197 f file to write to
198 ptr point to the string
199 len length for print_custring_bylen()
200
201Returns: nothing
202*/
203
Nick Kralevichf73ff172014-09-27 12:41:49 -0700204static void
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100205print_custring(FILE *f, PCRE2_SPTR ptr)
Nick Kralevichf73ff172014-09-27 12:41:49 -0700206{
207while (*ptr != '\0')
208 {
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700209 uint32_t c = *ptr++;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700210 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
211 }
212}
213
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100214static void
215print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
216{
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100217for (; len > 0; len--)
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100218 {
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700219 uint32_t c = *ptr++;
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100220 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
221 }
222}
223
224
225
Nick Kralevichf73ff172014-09-27 12:41:49 -0700226/*************************************************
227* Find Unicode property name *
228*************************************************/
229
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100230/* When there is no UTF/UCP support, the table of names does not exist. This
231function should not be called in such configurations, because a pattern that
232tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
233into the main code, however, we just put one into this function. */
234
Nick Kralevichf73ff172014-09-27 12:41:49 -0700235static const char *
236get_ucpname(unsigned int ptype, unsigned int pvalue)
237{
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100238#ifdef SUPPORT_UNICODE
Nick Kralevichf73ff172014-09-27 12:41:49 -0700239int i;
240for (i = PRIV(utt_size) - 1; i >= 0; i--)
241 {
242 if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break;
243 }
244return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??";
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100245#else /* No UTF support */
246(void)ptype;
247(void)pvalue;
248return "??";
249#endif /* SUPPORT_UNICODE */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700250}
251
252
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100253
Nick Kralevichf73ff172014-09-27 12:41:49 -0700254/*************************************************
255* Print Unicode property value *
256*************************************************/
257
258/* "Normal" properties can be printed from tables. The PT_CLIST property is a
259pseudo-property that contains a pointer to a list of case-equivalent
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100260characters.
261
262Arguments:
263 f file to write to
264 code pointer in the compiled code
265 before text to print before
266 after text to print after
267
268Returns: nothing
269*/
Nick Kralevichf73ff172014-09-27 12:41:49 -0700270
271static void
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100272print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
Nick Kralevichf73ff172014-09-27 12:41:49 -0700273{
274if (code[1] != PT_CLIST)
275 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100276 fprintf(f, "%s%s %s%s", before, OP_names[*code], get_ucpname(code[1],
Nick Kralevichf73ff172014-09-27 12:41:49 -0700277 code[2]), after);
278 }
279else
280 {
281 const char *not = (*code == OP_PROP)? "" : "not ";
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100282 const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
Nick Kralevichf73ff172014-09-27 12:41:49 -0700283 fprintf (f, "%s%sclist", before, not);
284 while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
285 fprintf(f, "%s", after);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700286 }
287}
288
289
290
Nick Kralevichf73ff172014-09-27 12:41:49 -0700291/*************************************************
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100292* Print compiled pattern *
Nick Kralevichf73ff172014-09-27 12:41:49 -0700293*************************************************/
294
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100295/* The print_lengths flag controls whether offsets and lengths of items are
296printed. Lenths can be turned off from pcre2test so that automatic tests on
297bytecode can be written that do not depend on the value of LINK_SIZE.
Nick Kralevichf73ff172014-09-27 12:41:49 -0700298
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100299Arguments:
300 re a compiled pattern
301 f the file to write to
302 print_lengths show various lengths
303
304Returns: nothing
305*/
306
307static void
308pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
Nick Kralevichf73ff172014-09-27 12:41:49 -0700309{
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100310PCRE2_SPTR codestart, nametable, code;
311uint32_t nesize = re->name_entry_size;
312BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700313
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100314nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
315code = codestart = nametable + re->name_count * re->name_entry_size;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700316
317for(;;)
318 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100319 PCRE2_SPTR ccode;
320 uint32_t c;
321 int i;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700322 const char *flag = " ";
Nick Kralevichf73ff172014-09-27 12:41:49 -0700323 unsigned int extra = 0;
324
325 if (print_lengths)
326 fprintf(f, "%3d ", (int)(code - codestart));
327 else
328 fprintf(f, " ");
329
330 switch(*code)
331 {
332/* ========================================================================== */
333 /* These cases are never obeyed. This is a fudge that causes a compile-
334 time error if the vectors OP_names or OP_lengths, which are indexed
335 by opcode, are not the correct length. It seems to be the only way to do
336 such a check at compile time, as the sizeof() operator does not work in
337 the C preprocessor. */
338
339 case OP_TABLE_LENGTH:
340 case OP_TABLE_LENGTH +
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100341 ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
342 (sizeof(OP_lengths) == OP_TABLE_LENGTH)):
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700343 return;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700344/* ========================================================================== */
345
346 case OP_END:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100347 fprintf(f, " %s\n", OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700348 fprintf(f, "------------------------------------------------------------------\n");
349 return;
350
351 case OP_CHAR:
352 fprintf(f, " ");
353 do
354 {
355 code++;
356 code += 1 + print_char(f, code, utf);
357 }
358 while (*code == OP_CHAR);
359 fprintf(f, "\n");
360 continue;
361
362 case OP_CHARI:
363 fprintf(f, " /i ");
364 do
365 {
366 code++;
367 code += 1 + print_char(f, code, utf);
368 }
369 while (*code == OP_CHARI);
370 fprintf(f, "\n");
371 continue;
372
373 case OP_CBRA:
374 case OP_CBRAPOS:
375 case OP_SCBRA:
376 case OP_SCBRAPOS:
377 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
378 else fprintf(f, " ");
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100379 fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
Nick Kralevichf73ff172014-09-27 12:41:49 -0700380 break;
381
382 case OP_BRA:
383 case OP_BRAPOS:
384 case OP_SBRA:
385 case OP_SBRAPOS:
386 case OP_KETRMAX:
387 case OP_KETRMIN:
388 case OP_KETRPOS:
389 case OP_ALT:
390 case OP_KET:
391 case OP_ASSERT:
392 case OP_ASSERT_NOT:
393 case OP_ASSERTBACK:
394 case OP_ASSERTBACK_NOT:
Elliott Hughes2dbd7d22020-06-03 14:32:37 -0700395 case OP_ASSERT_NA:
396 case OP_ASSERTBACK_NA:
Nick Kralevichf73ff172014-09-27 12:41:49 -0700397 case OP_ONCE:
Elliott Hughes0c26e192019-08-07 12:24:46 -0700398 case OP_SCRIPT_RUN:
Nick Kralevichf73ff172014-09-27 12:41:49 -0700399 case OP_COND:
400 case OP_SCOND:
401 case OP_REVERSE:
402 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
403 else fprintf(f, " ");
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100404 fprintf(f, "%s", OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700405 break;
406
407 case OP_CLOSE:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100408 fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
Nick Kralevichf73ff172014-09-27 12:41:49 -0700409 break;
410
411 case OP_CREF:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100412 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700413 break;
414
415 case OP_DNCREF:
416 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100417 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700418 fprintf(f, " %s Cond ref <", flag);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100419 print_custring(f, entry);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700420 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
421 }
422 break;
423
424 case OP_RREF:
425 c = GET2(code, 1);
426 if (c == RREF_ANY)
427 fprintf(f, " Cond recurse any");
428 else
429 fprintf(f, " Cond recurse %d", c);
430 break;
431
432 case OP_DNRREF:
433 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100434 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700435 fprintf(f, " %s Cond recurse <", flag);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100436 print_custring(f, entry);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700437 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
438 }
439 break;
440
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100441 case OP_FALSE:
442 fprintf(f, " Cond false");
443 break;
444
445 case OP_TRUE:
446 fprintf(f, " Cond true");
Nick Kralevichf73ff172014-09-27 12:41:49 -0700447 break;
448
449 case OP_STARI:
450 case OP_MINSTARI:
451 case OP_POSSTARI:
452 case OP_PLUSI:
453 case OP_MINPLUSI:
454 case OP_POSPLUSI:
455 case OP_QUERYI:
456 case OP_MINQUERYI:
457 case OP_POSQUERYI:
458 flag = "/i";
459 /* Fall through */
460 case OP_STAR:
461 case OP_MINSTAR:
462 case OP_POSSTAR:
463 case OP_PLUS:
464 case OP_MINPLUS:
465 case OP_POSPLUS:
466 case OP_QUERY:
467 case OP_MINQUERY:
468 case OP_POSQUERY:
469 case OP_TYPESTAR:
470 case OP_TYPEMINSTAR:
471 case OP_TYPEPOSSTAR:
472 case OP_TYPEPLUS:
473 case OP_TYPEMINPLUS:
474 case OP_TYPEPOSPLUS:
475 case OP_TYPEQUERY:
476 case OP_TYPEMINQUERY:
477 case OP_TYPEPOSQUERY:
478 fprintf(f, " %s ", flag);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100479
Nick Kralevichf73ff172014-09-27 12:41:49 -0700480 if (*code >= OP_TYPESTAR)
481 {
482 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
483 {
484 print_prop(f, code + 1, "", " ");
485 extra = 2;
486 }
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100487 else fprintf(f, "%s", OP_names[code[1]]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700488 }
489 else extra = print_char(f, code+1, utf);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100490 fprintf(f, "%s", OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700491 break;
492
493 case OP_EXACTI:
494 case OP_UPTOI:
495 case OP_MINUPTOI:
496 case OP_POSUPTOI:
497 flag = "/i";
498 /* Fall through */
499 case OP_EXACT:
500 case OP_UPTO:
501 case OP_MINUPTO:
502 case OP_POSUPTO:
503 fprintf(f, " %s ", flag);
504 extra = print_char(f, code + 1 + IMM2_SIZE, utf);
505 fprintf(f, "{");
506 if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
507 fprintf(f, "%d}", GET2(code,1));
508 if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
509 else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
510 break;
511
512 case OP_TYPEEXACT:
513 case OP_TYPEUPTO:
514 case OP_TYPEMINUPTO:
515 case OP_TYPEPOSUPTO:
516 if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
517 {
518 print_prop(f, code + IMM2_SIZE + 1, " ", " ");
519 extra = 2;
520 }
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100521 else fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700522 fprintf(f, "{");
523 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
524 fprintf(f, "%d}", GET2(code,1));
525 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
526 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
527 break;
528
529 case OP_NOTI:
530 flag = "/i";
531 /* Fall through */
532 case OP_NOT:
533 fprintf(f, " %s [^", flag);
534 extra = print_char(f, code + 1, utf);
535 fprintf(f, "]");
536 break;
537
538 case OP_NOTSTARI:
539 case OP_NOTMINSTARI:
540 case OP_NOTPOSSTARI:
541 case OP_NOTPLUSI:
542 case OP_NOTMINPLUSI:
543 case OP_NOTPOSPLUSI:
544 case OP_NOTQUERYI:
545 case OP_NOTMINQUERYI:
546 case OP_NOTPOSQUERYI:
547 flag = "/i";
548 /* Fall through */
549
550 case OP_NOTSTAR:
551 case OP_NOTMINSTAR:
552 case OP_NOTPOSSTAR:
553 case OP_NOTPLUS:
554 case OP_NOTMINPLUS:
555 case OP_NOTPOSPLUS:
556 case OP_NOTQUERY:
557 case OP_NOTMINQUERY:
558 case OP_NOTPOSQUERY:
559 fprintf(f, " %s [^", flag);
560 extra = print_char(f, code + 1, utf);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100561 fprintf(f, "]%s", OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700562 break;
563
564 case OP_NOTEXACTI:
565 case OP_NOTUPTOI:
566 case OP_NOTMINUPTOI:
567 case OP_NOTPOSUPTOI:
568 flag = "/i";
569 /* Fall through */
570
571 case OP_NOTEXACT:
572 case OP_NOTUPTO:
573 case OP_NOTMINUPTO:
574 case OP_NOTPOSUPTO:
575 fprintf(f, " %s [^", flag);
576 extra = print_char(f, code + 1 + IMM2_SIZE, utf);
577 fprintf(f, "]{");
578 if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
579 fprintf(f, "%d}", GET2(code,1));
580 if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
581 else
582 if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
583 break;
584
585 case OP_RECURSE:
586 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
587 else fprintf(f, " ");
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100588 fprintf(f, "%s", OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700589 break;
590
591 case OP_REFI:
592 flag = "/i";
593 /* Fall through */
594 case OP_REF:
595 fprintf(f, " %s \\%d", flag, GET2(code,1));
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100596 ccode = code + OP_lengths[*code];
Nick Kralevichf73ff172014-09-27 12:41:49 -0700597 goto CLASS_REF_REPEAT;
598
599 case OP_DNREFI:
600 flag = "/i";
601 /* Fall through */
602 case OP_DNREF:
603 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100604 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700605 fprintf(f, " %s \\k<", flag);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100606 print_custring(f, entry);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700607 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
608 }
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100609 ccode = code + OP_lengths[*code];
Nick Kralevichf73ff172014-09-27 12:41:49 -0700610 goto CLASS_REF_REPEAT;
611
612 case OP_CALLOUT:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100613 fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
614 GET(code, 1), GET(code, 1 + LINK_SIZE));
615 break;
616
617 case OP_CALLOUT_STR:
618 c = code[1 + 4*LINK_SIZE];
619 fprintf(f, " %s %c", OP_names[*code], c);
620 extra = GET(code, 1 + 2*LINK_SIZE);
621 print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
622 for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
623 if (c == PRIV(callout_start_delims)[i])
624 {
625 c = PRIV(callout_end_delims)[i];
626 break;
627 }
628 fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
629 GET(code, 1 + LINK_SIZE));
Nick Kralevichf73ff172014-09-27 12:41:49 -0700630 break;
631
632 case OP_PROP:
633 case OP_NOTPROP:
634 print_prop(f, code, " ", "");
635 break;
636
637 /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
638 in having this code always here, and it makes it less messy without all
639 those #ifdefs. */
640
641 case OP_CLASS:
642 case OP_NCLASS:
643 case OP_XCLASS:
644 {
Nick Kralevichf73ff172014-09-27 12:41:49 -0700645 unsigned int min, max;
646 BOOL printmap;
647 BOOL invertmap = FALSE;
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100648 uint8_t *map;
649 uint8_t inverted_map[32];
Nick Kralevichf73ff172014-09-27 12:41:49 -0700650
651 fprintf(f, " [");
652
653 if (*code == OP_XCLASS)
654 {
655 extra = GET(code, 1);
656 ccode = code + LINK_SIZE + 1;
657 printmap = (*ccode & XCL_MAP) != 0;
658 if ((*ccode & XCL_NOT) != 0)
659 {
660 invertmap = (*ccode & XCL_HASPROP) == 0;
661 fprintf(f, "^");
662 }
663 ccode++;
664 }
665 else
666 {
667 printmap = TRUE;
668 ccode = code + 1;
669 }
670
671 /* Print a bit map */
672
673 if (printmap)
674 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100675 map = (uint8_t *)ccode;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700676 if (invertmap)
677 {
Elliott Hughes0c26e192019-08-07 12:24:46 -0700678 /* Using 255 ^ instead of ~ avoids clang sanitize warning. */
679 for (i = 0; i < 32; i++) inverted_map[i] = 255 ^ map[i];
Nick Kralevichf73ff172014-09-27 12:41:49 -0700680 map = inverted_map;
681 }
682
683 for (i = 0; i < 256; i++)
684 {
Elliott Hughes0c26e192019-08-07 12:24:46 -0700685 if ((map[i/8] & (1u << (i&7))) != 0)
Nick Kralevichf73ff172014-09-27 12:41:49 -0700686 {
687 int j;
688 for (j = i+1; j < 256; j++)
Elliott Hughes0c26e192019-08-07 12:24:46 -0700689 if ((map[j/8] & (1u << (j&7))) == 0) break;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700690 if (i == '-' || i == ']') fprintf(f, "\\");
691 if (PRINTABLE(i)) fprintf(f, "%c", i);
692 else fprintf(f, "\\x%02x", i);
693 if (--j > i)
694 {
695 if (j != i + 1) fprintf(f, "-");
696 if (j == '-' || j == ']') fprintf(f, "\\");
697 if (PRINTABLE(j)) fprintf(f, "%c", j);
698 else fprintf(f, "\\x%02x", j);
699 }
700 i = j;
701 }
702 }
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100703 ccode += 32 / sizeof(PCRE2_UCHAR);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700704 }
705
706 /* For an XCLASS there is always some additional data */
707
708 if (*code == OP_XCLASS)
709 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100710 PCRE2_UCHAR ch;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700711 while ((ch = *ccode++) != XCL_END)
712 {
713 BOOL not = FALSE;
714 const char *notch = "";
715
716 switch(ch)
717 {
718 case XCL_NOTPROP:
719 not = TRUE;
720 notch = "^";
721 /* Fall through */
722
723 case XCL_PROP:
724 {
725 unsigned int ptype = *ccode++;
726 unsigned int pvalue = *ccode++;
727
728 switch(ptype)
729 {
730 case PT_PXGRAPH:
731 fprintf(f, "[:%sgraph:]", notch);
732 break;
733
734 case PT_PXPRINT:
735 fprintf(f, "[:%sprint:]", notch);
736 break;
737
738 case PT_PXPUNCT:
739 fprintf(f, "[:%spunct:]", notch);
740 break;
741
742 default:
743 fprintf(f, "\\%c{%s}", (not? 'P':'p'),
744 get_ucpname(ptype, pvalue));
745 break;
746 }
747 }
748 break;
749
750 default:
751 ccode += 1 + print_char(f, ccode, utf);
752 if (ch == XCL_RANGE)
753 {
754 fprintf(f, "-");
755 ccode += 1 + print_char(f, ccode, utf);
756 }
757 break;
758 }
759 }
760 }
761
762 /* Indicate a non-UTF class which was created by negation */
763
764 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
765
766 /* Handle repeats after a class or a back reference */
767
768 CLASS_REF_REPEAT:
769 switch(*ccode)
770 {
771 case OP_CRSTAR:
772 case OP_CRMINSTAR:
773 case OP_CRPLUS:
774 case OP_CRMINPLUS:
775 case OP_CRQUERY:
776 case OP_CRMINQUERY:
777 case OP_CRPOSSTAR:
778 case OP_CRPOSPLUS:
779 case OP_CRPOSQUERY:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100780 fprintf(f, "%s", OP_names[*ccode]);
781 extra += OP_lengths[*ccode];
Nick Kralevichf73ff172014-09-27 12:41:49 -0700782 break;
783
784 case OP_CRRANGE:
785 case OP_CRMINRANGE:
786 case OP_CRPOSRANGE:
787 min = GET2(ccode,1);
788 max = GET2(ccode,1 + IMM2_SIZE);
789 if (max == 0) fprintf(f, "{%u,}", min);
790 else fprintf(f, "{%u,%u}", min, max);
791 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
792 else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100793 extra += OP_lengths[*ccode];
Nick Kralevichf73ff172014-09-27 12:41:49 -0700794 break;
795
796 /* Do nothing if it's not a repeat; this code stops picky compilers
797 warning about the lack of a default code path. */
798
799 default:
800 break;
801 }
802 }
803 break;
804
805 case OP_MARK:
Elliott Hughes653c2102019-01-09 15:41:36 -0800806 case OP_COMMIT_ARG:
Nick Kralevichf73ff172014-09-27 12:41:49 -0700807 case OP_PRUNE_ARG:
808 case OP_SKIP_ARG:
809 case OP_THEN_ARG:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100810 fprintf(f, " %s ", OP_names[*code]);
811 print_custring_bylen(f, code + 2, code[1]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700812 extra += code[1];
813 break;
814
815 case OP_THEN:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100816 fprintf(f, " %s", OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700817 break;
818
819 case OP_CIRCM:
820 case OP_DOLLM:
821 flag = "/m";
822 /* Fall through */
823
824 /* Anything else is just an item with no data, but possibly a flag. */
825
826 default:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100827 fprintf(f, " %s %s", flag, OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700828 break;
829 }
830
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100831 code += OP_lengths[*code] + extra;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700832 fprintf(f, "\n");
833 }
834}
835
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100836/* End of pcre2_printint.c */