blob: 2d30926a744e628fc9433ca74ce61e8e57cdbaf9 [file] [log] [blame]
Nick Kralevichf73ff172014-09-27 12:41:49 -07001/*************************************************
2* Perl-Compatible Regular Expressions *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
Janis Danisevskis53e448c2016-03-31 13:35:25 +01009 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016 University of Cambridge
Nick Kralevichf73ff172014-09-27 12:41:49 -070011
12-----------------------------------------------------------------------------
13Redistribution and use in source and binary forms, with or without
14modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37POSSIBILITY OF SUCH DAMAGE.
38-----------------------------------------------------------------------------
39*/
40
41
42/* This module contains a PCRE private debugging function for printing out the
43internal form of a compiled regular expression, along with some supporting
Janis Danisevskis53e448c2016-03-31 13:35:25 +010044local functions. This source file is #included in pcre2test.c at each supported
45code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
46that comprise the library. It can also optionally be included in
47pcre2_compile.c for detailed debugging in error situations. */
Nick Kralevichf73ff172014-09-27 12:41:49 -070048
Nick Kralevichf73ff172014-09-27 12:41:49 -070049
Janis Danisevskis53e448c2016-03-31 13:35:25 +010050/* Tables of operator names. The same 8-bit table is used for all code unit
51widths, so it must be defined only once. The list itself is defined in
52pcre2_internal.h, which is #included by pcre2test before this file. */
Nick Kralevichf73ff172014-09-27 12:41:49 -070053
Janis Danisevskis53e448c2016-03-31 13:35:25 +010054#ifndef OP_LISTS_DEFINED
55static const char *OP_names[] = { OP_NAME_LIST };
56#define OP_LISTS_DEFINED
Nick Kralevichf73ff172014-09-27 12:41:49 -070057#endif
58
Janis Danisevskis53e448c2016-03-31 13:35:25 +010059/* The functions and tables herein must all have mode-dependent names. */
Nick Kralevichf73ff172014-09-27 12:41:49 -070060
Janis Danisevskis53e448c2016-03-31 13:35:25 +010061#define OP_lengths PCRE2_SUFFIX(OP_lengths_)
62#define get_ucpname PCRE2_SUFFIX(get_ucpname_)
63#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
64#define print_char PCRE2_SUFFIX(print_char_)
65#define print_custring PCRE2_SUFFIX(print_custring_)
66#define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_)
67#define print_prop PCRE2_SUFFIX(print_prop_)
Nick Kralevichf73ff172014-09-27 12:41:49 -070068
Janis Danisevskis53e448c2016-03-31 13:35:25 +010069/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
70the definition is next to the definition of the opcodes in pcre2_internal.h.
71The contents of the table are, however, mode-dependent. */
Nick Kralevichf73ff172014-09-27 12:41:49 -070072
Janis Danisevskis53e448c2016-03-31 13:35:25 +010073static const uint8_t OP_lengths[] = { OP_LENGTHS };
Nick Kralevichf73ff172014-09-27 12:41:49 -070074
75
76
77/*************************************************
Janis Danisevskis53e448c2016-03-31 13:35:25 +010078* Print one character from a string *
Nick Kralevichf73ff172014-09-27 12:41:49 -070079*************************************************/
80
Janis Danisevskis53e448c2016-03-31 13:35:25 +010081/* In UTF mode the character may occupy more than one code unit.
82
83Arguments:
84 f file to write to
85 ptr pointer to first code unit of the character
86 utf TRUE if string is UTF (will be FALSE if UTF is not supported)
87
88Returns: number of additional code units used
89*/
90
Nick Kralevichf73ff172014-09-27 12:41:49 -070091static unsigned int
Janis Danisevskis53e448c2016-03-31 13:35:25 +010092print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
Nick Kralevichf73ff172014-09-27 12:41:49 -070093{
Janis Danisevskis53e448c2016-03-31 13:35:25 +010094uint32_t c = *ptr;
95BOOL one_code_unit = !utf;
Nick Kralevichf73ff172014-09-27 12:41:49 -070096
Janis Danisevskis53e448c2016-03-31 13:35:25 +010097/* If UTF is supported and requested, check for a valid single code unit. */
Nick Kralevichf73ff172014-09-27 12:41:49 -070098
Janis Danisevskis53e448c2016-03-31 13:35:25 +010099#ifdef SUPPORT_UNICODE
100if (utf)
101 {
102#if PCRE2_CODE_UNIT_WIDTH == 8
103 one_code_unit = c < 0x80;
104#elif PCRE2_CODE_UNIT_WIDTH == 16
105 one_code_unit = (c & 0xfc00) != 0xd800;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700106#else
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100107 one_code_unit = (c & 0xfffff800u) != 0xd800u;
108#endif /* CODE_UNIT_WIDTH */
109 }
110#endif /* SUPPORT_UNICODE */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700111
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100112/* Handle a valid one-code-unit character at any width. */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700113
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100114if (one_code_unit)
Nick Kralevichf73ff172014-09-27 12:41:49 -0700115 {
116 if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
117 else if (c < 0x80) fprintf(f, "\\x%02x", c);
118 else fprintf(f, "\\x{%02x}", c);
119 return 0;
120 }
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100121
122/* Code for invalid UTF code units and multi-unit UTF characters is different
123for each width. If UTF is not supported, control should never get here, but we
124need a return statement to keep the compiler happy. */
125
126#ifndef SUPPORT_UNICODE
127return 0;
128#else
129
130/* Malformed UTF-8 should occur only if the sanity check has been turned off.
131Rather than swallow random bytes, just stop if we hit a bad one. Print it with
132\X instead of \x as an indication. */
133
134#if PCRE2_CODE_UNIT_WIDTH == 8
135if ((c & 0xc0) != 0xc0)
136 {
137 fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
138 return 0;
139 }
Nick Kralevichf73ff172014-09-27 12:41:49 -0700140else
141 {
142 int i;
143 int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
144 int s = 6*a;
145 c = (c & PRIV(utf8_table3)[a]) << s;
146 for (i = 1; i <= a; i++)
147 {
Nick Kralevichf73ff172014-09-27 12:41:49 -0700148 if ((ptr[i] & 0xc0) != 0x80)
149 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100150 fprintf(f, "\\X{%x}", c); /* Invalid secondary byte */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700151 return i - 1;
152 }
Nick Kralevichf73ff172014-09-27 12:41:49 -0700153 s -= 6;
154 c |= (ptr[i] & 0x3f) << s;
155 }
156 fprintf(f, "\\x{%x}", c);
157 return a;
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100158}
159#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700160
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100161/* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
162Print it with \X instead of \x as an indication. */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700163
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100164#if PCRE2_CODE_UNIT_WIDTH == 16
165if ((ptr[1] & 0xfc00) != 0xdc00)
Nick Kralevichf73ff172014-09-27 12:41:49 -0700166 {
Nick Kralevichf73ff172014-09-27 12:41:49 -0700167 fprintf(f, "\\X{%x}", c);
168 return 0;
169 }
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100170c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
171fprintf(f, "\\x{%x}", c);
172return 1;
173#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700174
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100175/* For UTF-32 we get here only for a malformed code unit, which should only
176occur if the sanity check has been turned off. Print it with \X instead of \x
177as an indication. */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700178
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100179#if PCRE2_CODE_UNIT_WIDTH == 32
180fprintf(f, "\\X{%x}", c);
181return 0;
182#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
183#endif /* SUPPORT_UNICODE */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700184}
185
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100186
187
Nick Kralevichf73ff172014-09-27 12:41:49 -0700188/*************************************************
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100189* Print string as a list of code units *
Nick Kralevichf73ff172014-09-27 12:41:49 -0700190*************************************************/
191
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100192/* These take no account of UTF as they always print each individual code unit.
193The string is zero-terminated for print_custring(); the length is given for
194print_custring_bylen().
195
196Arguments:
197 f file to write to
198 ptr point to the string
199 len length for print_custring_bylen()
200
201Returns: nothing
202*/
203
Nick Kralevichf73ff172014-09-27 12:41:49 -0700204static void
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100205print_custring(FILE *f, PCRE2_SPTR ptr)
Nick Kralevichf73ff172014-09-27 12:41:49 -0700206{
207while (*ptr != '\0')
208 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100209 register uint32_t c = *ptr++;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700210 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
211 }
212}
213
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100214static void
215print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
216{
Janis Danisevskis8b979b22016-08-15 16:09:16 +0100217for (; len > 0; len--)
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100218 {
219 register uint32_t c = *ptr++;
220 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
221 }
222}
223
224
225
Nick Kralevichf73ff172014-09-27 12:41:49 -0700226/*************************************************
227* Find Unicode property name *
228*************************************************/
229
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100230/* When there is no UTF/UCP support, the table of names does not exist. This
231function should not be called in such configurations, because a pattern that
232tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
233into the main code, however, we just put one into this function. */
234
Nick Kralevichf73ff172014-09-27 12:41:49 -0700235static const char *
236get_ucpname(unsigned int ptype, unsigned int pvalue)
237{
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100238#ifdef SUPPORT_UNICODE
Nick Kralevichf73ff172014-09-27 12:41:49 -0700239int i;
240for (i = PRIV(utt_size) - 1; i >= 0; i--)
241 {
242 if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break;
243 }
244return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??";
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100245#else /* No UTF support */
246(void)ptype;
247(void)pvalue;
248return "??";
249#endif /* SUPPORT_UNICODE */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700250}
251
252
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100253
Nick Kralevichf73ff172014-09-27 12:41:49 -0700254/*************************************************
255* Print Unicode property value *
256*************************************************/
257
258/* "Normal" properties can be printed from tables. The PT_CLIST property is a
259pseudo-property that contains a pointer to a list of case-equivalent
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100260characters.
261
262Arguments:
263 f file to write to
264 code pointer in the compiled code
265 before text to print before
266 after text to print after
267
268Returns: nothing
269*/
Nick Kralevichf73ff172014-09-27 12:41:49 -0700270
271static void
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100272print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
Nick Kralevichf73ff172014-09-27 12:41:49 -0700273{
274if (code[1] != PT_CLIST)
275 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100276 fprintf(f, "%s%s %s%s", before, OP_names[*code], get_ucpname(code[1],
Nick Kralevichf73ff172014-09-27 12:41:49 -0700277 code[2]), after);
278 }
279else
280 {
281 const char *not = (*code == OP_PROP)? "" : "not ";
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100282 const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
Nick Kralevichf73ff172014-09-27 12:41:49 -0700283 fprintf (f, "%s%sclist", before, not);
284 while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
285 fprintf(f, "%s", after);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700286 }
287}
288
289
290
Nick Kralevichf73ff172014-09-27 12:41:49 -0700291/*************************************************
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100292* Print compiled pattern *
Nick Kralevichf73ff172014-09-27 12:41:49 -0700293*************************************************/
294
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100295/* The print_lengths flag controls whether offsets and lengths of items are
296printed. Lenths can be turned off from pcre2test so that automatic tests on
297bytecode can be written that do not depend on the value of LINK_SIZE.
Nick Kralevichf73ff172014-09-27 12:41:49 -0700298
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100299Arguments:
300 re a compiled pattern
301 f the file to write to
302 print_lengths show various lengths
303
304Returns: nothing
305*/
306
307static void
308pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
Nick Kralevichf73ff172014-09-27 12:41:49 -0700309{
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100310PCRE2_SPTR codestart, nametable, code;
311uint32_t nesize = re->name_entry_size;
312BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700313
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100314nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
315code = codestart = nametable + re->name_count * re->name_entry_size;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700316
317for(;;)
318 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100319 PCRE2_SPTR ccode;
320 uint32_t c;
321 int i;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700322 const char *flag = " ";
Nick Kralevichf73ff172014-09-27 12:41:49 -0700323 unsigned int extra = 0;
324
325 if (print_lengths)
326 fprintf(f, "%3d ", (int)(code - codestart));
327 else
328 fprintf(f, " ");
329
330 switch(*code)
331 {
332/* ========================================================================== */
333 /* These cases are never obeyed. This is a fudge that causes a compile-
334 time error if the vectors OP_names or OP_lengths, which are indexed
335 by opcode, are not the correct length. It seems to be the only way to do
336 such a check at compile time, as the sizeof() operator does not work in
337 the C preprocessor. */
338
339 case OP_TABLE_LENGTH:
340 case OP_TABLE_LENGTH +
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100341 ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
342 (sizeof(OP_lengths) == OP_TABLE_LENGTH)):
Nick Kralevichf73ff172014-09-27 12:41:49 -0700343 break;
344/* ========================================================================== */
345
346 case OP_END:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100347 fprintf(f, " %s\n", OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700348 fprintf(f, "------------------------------------------------------------------\n");
349 return;
350
351 case OP_CHAR:
352 fprintf(f, " ");
353 do
354 {
355 code++;
356 code += 1 + print_char(f, code, utf);
357 }
358 while (*code == OP_CHAR);
359 fprintf(f, "\n");
360 continue;
361
362 case OP_CHARI:
363 fprintf(f, " /i ");
364 do
365 {
366 code++;
367 code += 1 + print_char(f, code, utf);
368 }
369 while (*code == OP_CHARI);
370 fprintf(f, "\n");
371 continue;
372
373 case OP_CBRA:
374 case OP_CBRAPOS:
375 case OP_SCBRA:
376 case OP_SCBRAPOS:
377 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
378 else fprintf(f, " ");
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100379 fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
Nick Kralevichf73ff172014-09-27 12:41:49 -0700380 break;
381
382 case OP_BRA:
383 case OP_BRAPOS:
384 case OP_SBRA:
385 case OP_SBRAPOS:
386 case OP_KETRMAX:
387 case OP_KETRMIN:
388 case OP_KETRPOS:
389 case OP_ALT:
390 case OP_KET:
391 case OP_ASSERT:
392 case OP_ASSERT_NOT:
393 case OP_ASSERTBACK:
394 case OP_ASSERTBACK_NOT:
395 case OP_ONCE:
396 case OP_ONCE_NC:
397 case OP_COND:
398 case OP_SCOND:
399 case OP_REVERSE:
400 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
401 else fprintf(f, " ");
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100402 fprintf(f, "%s", OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700403 break;
404
405 case OP_CLOSE:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100406 fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
Nick Kralevichf73ff172014-09-27 12:41:49 -0700407 break;
408
409 case OP_CREF:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100410 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700411 break;
412
413 case OP_DNCREF:
414 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100415 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700416 fprintf(f, " %s Cond ref <", flag);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100417 print_custring(f, entry);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700418 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
419 }
420 break;
421
422 case OP_RREF:
423 c = GET2(code, 1);
424 if (c == RREF_ANY)
425 fprintf(f, " Cond recurse any");
426 else
427 fprintf(f, " Cond recurse %d", c);
428 break;
429
430 case OP_DNRREF:
431 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100432 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700433 fprintf(f, " %s Cond recurse <", flag);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100434 print_custring(f, entry);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700435 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
436 }
437 break;
438
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100439 case OP_FALSE:
440 fprintf(f, " Cond false");
441 break;
442
443 case OP_TRUE:
444 fprintf(f, " Cond true");
Nick Kralevichf73ff172014-09-27 12:41:49 -0700445 break;
446
447 case OP_STARI:
448 case OP_MINSTARI:
449 case OP_POSSTARI:
450 case OP_PLUSI:
451 case OP_MINPLUSI:
452 case OP_POSPLUSI:
453 case OP_QUERYI:
454 case OP_MINQUERYI:
455 case OP_POSQUERYI:
456 flag = "/i";
457 /* Fall through */
458 case OP_STAR:
459 case OP_MINSTAR:
460 case OP_POSSTAR:
461 case OP_PLUS:
462 case OP_MINPLUS:
463 case OP_POSPLUS:
464 case OP_QUERY:
465 case OP_MINQUERY:
466 case OP_POSQUERY:
467 case OP_TYPESTAR:
468 case OP_TYPEMINSTAR:
469 case OP_TYPEPOSSTAR:
470 case OP_TYPEPLUS:
471 case OP_TYPEMINPLUS:
472 case OP_TYPEPOSPLUS:
473 case OP_TYPEQUERY:
474 case OP_TYPEMINQUERY:
475 case OP_TYPEPOSQUERY:
476 fprintf(f, " %s ", flag);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100477
Nick Kralevichf73ff172014-09-27 12:41:49 -0700478 if (*code >= OP_TYPESTAR)
479 {
480 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
481 {
482 print_prop(f, code + 1, "", " ");
483 extra = 2;
484 }
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100485 else fprintf(f, "%s", OP_names[code[1]]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700486 }
487 else extra = print_char(f, code+1, utf);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100488 fprintf(f, "%s", OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700489 break;
490
491 case OP_EXACTI:
492 case OP_UPTOI:
493 case OP_MINUPTOI:
494 case OP_POSUPTOI:
495 flag = "/i";
496 /* Fall through */
497 case OP_EXACT:
498 case OP_UPTO:
499 case OP_MINUPTO:
500 case OP_POSUPTO:
501 fprintf(f, " %s ", flag);
502 extra = print_char(f, code + 1 + IMM2_SIZE, utf);
503 fprintf(f, "{");
504 if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
505 fprintf(f, "%d}", GET2(code,1));
506 if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
507 else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
508 break;
509
510 case OP_TYPEEXACT:
511 case OP_TYPEUPTO:
512 case OP_TYPEMINUPTO:
513 case OP_TYPEPOSUPTO:
514 if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
515 {
516 print_prop(f, code + IMM2_SIZE + 1, " ", " ");
517 extra = 2;
518 }
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100519 else fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700520 fprintf(f, "{");
521 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
522 fprintf(f, "%d}", GET2(code,1));
523 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
524 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
525 break;
526
527 case OP_NOTI:
528 flag = "/i";
529 /* Fall through */
530 case OP_NOT:
531 fprintf(f, " %s [^", flag);
532 extra = print_char(f, code + 1, utf);
533 fprintf(f, "]");
534 break;
535
536 case OP_NOTSTARI:
537 case OP_NOTMINSTARI:
538 case OP_NOTPOSSTARI:
539 case OP_NOTPLUSI:
540 case OP_NOTMINPLUSI:
541 case OP_NOTPOSPLUSI:
542 case OP_NOTQUERYI:
543 case OP_NOTMINQUERYI:
544 case OP_NOTPOSQUERYI:
545 flag = "/i";
546 /* Fall through */
547
548 case OP_NOTSTAR:
549 case OP_NOTMINSTAR:
550 case OP_NOTPOSSTAR:
551 case OP_NOTPLUS:
552 case OP_NOTMINPLUS:
553 case OP_NOTPOSPLUS:
554 case OP_NOTQUERY:
555 case OP_NOTMINQUERY:
556 case OP_NOTPOSQUERY:
557 fprintf(f, " %s [^", flag);
558 extra = print_char(f, code + 1, utf);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100559 fprintf(f, "]%s", OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700560 break;
561
562 case OP_NOTEXACTI:
563 case OP_NOTUPTOI:
564 case OP_NOTMINUPTOI:
565 case OP_NOTPOSUPTOI:
566 flag = "/i";
567 /* Fall through */
568
569 case OP_NOTEXACT:
570 case OP_NOTUPTO:
571 case OP_NOTMINUPTO:
572 case OP_NOTPOSUPTO:
573 fprintf(f, " %s [^", flag);
574 extra = print_char(f, code + 1 + IMM2_SIZE, utf);
575 fprintf(f, "]{");
576 if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
577 fprintf(f, "%d}", GET2(code,1));
578 if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
579 else
580 if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
581 break;
582
583 case OP_RECURSE:
584 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
585 else fprintf(f, " ");
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100586 fprintf(f, "%s", OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700587 break;
588
589 case OP_REFI:
590 flag = "/i";
591 /* Fall through */
592 case OP_REF:
593 fprintf(f, " %s \\%d", flag, GET2(code,1));
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100594 ccode = code + OP_lengths[*code];
Nick Kralevichf73ff172014-09-27 12:41:49 -0700595 goto CLASS_REF_REPEAT;
596
597 case OP_DNREFI:
598 flag = "/i";
599 /* Fall through */
600 case OP_DNREF:
601 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100602 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700603 fprintf(f, " %s \\k<", flag);
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100604 print_custring(f, entry);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700605 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
606 }
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100607 ccode = code + OP_lengths[*code];
Nick Kralevichf73ff172014-09-27 12:41:49 -0700608 goto CLASS_REF_REPEAT;
609
610 case OP_CALLOUT:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100611 fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
612 GET(code, 1), GET(code, 1 + LINK_SIZE));
613 break;
614
615 case OP_CALLOUT_STR:
616 c = code[1 + 4*LINK_SIZE];
617 fprintf(f, " %s %c", OP_names[*code], c);
618 extra = GET(code, 1 + 2*LINK_SIZE);
619 print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
620 for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
621 if (c == PRIV(callout_start_delims)[i])
622 {
623 c = PRIV(callout_end_delims)[i];
624 break;
625 }
626 fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
627 GET(code, 1 + LINK_SIZE));
Nick Kralevichf73ff172014-09-27 12:41:49 -0700628 break;
629
630 case OP_PROP:
631 case OP_NOTPROP:
632 print_prop(f, code, " ", "");
633 break;
634
635 /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
636 in having this code always here, and it makes it less messy without all
637 those #ifdefs. */
638
639 case OP_CLASS:
640 case OP_NCLASS:
641 case OP_XCLASS:
642 {
Nick Kralevichf73ff172014-09-27 12:41:49 -0700643 unsigned int min, max;
644 BOOL printmap;
645 BOOL invertmap = FALSE;
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100646 uint8_t *map;
647 uint8_t inverted_map[32];
Nick Kralevichf73ff172014-09-27 12:41:49 -0700648
649 fprintf(f, " [");
650
651 if (*code == OP_XCLASS)
652 {
653 extra = GET(code, 1);
654 ccode = code + LINK_SIZE + 1;
655 printmap = (*ccode & XCL_MAP) != 0;
656 if ((*ccode & XCL_NOT) != 0)
657 {
658 invertmap = (*ccode & XCL_HASPROP) == 0;
659 fprintf(f, "^");
660 }
661 ccode++;
662 }
663 else
664 {
665 printmap = TRUE;
666 ccode = code + 1;
667 }
668
669 /* Print a bit map */
670
671 if (printmap)
672 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100673 map = (uint8_t *)ccode;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700674 if (invertmap)
675 {
676 for (i = 0; i < 32; i++) inverted_map[i] = ~map[i];
677 map = inverted_map;
678 }
679
680 for (i = 0; i < 256; i++)
681 {
682 if ((map[i/8] & (1 << (i&7))) != 0)
683 {
684 int j;
685 for (j = i+1; j < 256; j++)
686 if ((map[j/8] & (1 << (j&7))) == 0) break;
687 if (i == '-' || i == ']') fprintf(f, "\\");
688 if (PRINTABLE(i)) fprintf(f, "%c", i);
689 else fprintf(f, "\\x%02x", i);
690 if (--j > i)
691 {
692 if (j != i + 1) fprintf(f, "-");
693 if (j == '-' || j == ']') fprintf(f, "\\");
694 if (PRINTABLE(j)) fprintf(f, "%c", j);
695 else fprintf(f, "\\x%02x", j);
696 }
697 i = j;
698 }
699 }
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100700 ccode += 32 / sizeof(PCRE2_UCHAR);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700701 }
702
703 /* For an XCLASS there is always some additional data */
704
705 if (*code == OP_XCLASS)
706 {
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100707 PCRE2_UCHAR ch;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700708 while ((ch = *ccode++) != XCL_END)
709 {
710 BOOL not = FALSE;
711 const char *notch = "";
712
713 switch(ch)
714 {
715 case XCL_NOTPROP:
716 not = TRUE;
717 notch = "^";
718 /* Fall through */
719
720 case XCL_PROP:
721 {
722 unsigned int ptype = *ccode++;
723 unsigned int pvalue = *ccode++;
724
725 switch(ptype)
726 {
727 case PT_PXGRAPH:
728 fprintf(f, "[:%sgraph:]", notch);
729 break;
730
731 case PT_PXPRINT:
732 fprintf(f, "[:%sprint:]", notch);
733 break;
734
735 case PT_PXPUNCT:
736 fprintf(f, "[:%spunct:]", notch);
737 break;
738
739 default:
740 fprintf(f, "\\%c{%s}", (not? 'P':'p'),
741 get_ucpname(ptype, pvalue));
742 break;
743 }
744 }
745 break;
746
747 default:
748 ccode += 1 + print_char(f, ccode, utf);
749 if (ch == XCL_RANGE)
750 {
751 fprintf(f, "-");
752 ccode += 1 + print_char(f, ccode, utf);
753 }
754 break;
755 }
756 }
757 }
758
759 /* Indicate a non-UTF class which was created by negation */
760
761 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
762
763 /* Handle repeats after a class or a back reference */
764
765 CLASS_REF_REPEAT:
766 switch(*ccode)
767 {
768 case OP_CRSTAR:
769 case OP_CRMINSTAR:
770 case OP_CRPLUS:
771 case OP_CRMINPLUS:
772 case OP_CRQUERY:
773 case OP_CRMINQUERY:
774 case OP_CRPOSSTAR:
775 case OP_CRPOSPLUS:
776 case OP_CRPOSQUERY:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100777 fprintf(f, "%s", OP_names[*ccode]);
778 extra += OP_lengths[*ccode];
Nick Kralevichf73ff172014-09-27 12:41:49 -0700779 break;
780
781 case OP_CRRANGE:
782 case OP_CRMINRANGE:
783 case OP_CRPOSRANGE:
784 min = GET2(ccode,1);
785 max = GET2(ccode,1 + IMM2_SIZE);
786 if (max == 0) fprintf(f, "{%u,}", min);
787 else fprintf(f, "{%u,%u}", min, max);
788 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
789 else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100790 extra += OP_lengths[*ccode];
Nick Kralevichf73ff172014-09-27 12:41:49 -0700791 break;
792
793 /* Do nothing if it's not a repeat; this code stops picky compilers
794 warning about the lack of a default code path. */
795
796 default:
797 break;
798 }
799 }
800 break;
801
802 case OP_MARK:
803 case OP_PRUNE_ARG:
804 case OP_SKIP_ARG:
805 case OP_THEN_ARG:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100806 fprintf(f, " %s ", OP_names[*code]);
807 print_custring_bylen(f, code + 2, code[1]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700808 extra += code[1];
809 break;
810
811 case OP_THEN:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100812 fprintf(f, " %s", OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700813 break;
814
815 case OP_CIRCM:
816 case OP_DOLLM:
817 flag = "/m";
818 /* Fall through */
819
820 /* Anything else is just an item with no data, but possibly a flag. */
821
822 default:
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100823 fprintf(f, " %s %s", flag, OP_names[*code]);
Nick Kralevichf73ff172014-09-27 12:41:49 -0700824 break;
825 }
826
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100827 code += OP_lengths[*code] + extra;
Nick Kralevichf73ff172014-09-27 12:41:49 -0700828 fprintf(f, "\n");
829 }
830}
831
Janis Danisevskis53e448c2016-03-31 13:35:25 +0100832/* End of pcre2_printint.c */