Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 1 | /************************************************* |
| 2 | * Perl-Compatible Regular Expressions * |
| 3 | *************************************************/ |
| 4 | |
| 5 | /* PCRE is a library of functions to support regular expressions whose syntax |
| 6 | and semantics are as close as possible to those of the Perl 5 language. |
| 7 | |
| 8 | Written by Philip Hazel |
| 9 | Original API code Copyright (c) 1997-2012 University of Cambridge |
Elliott Hughes | 4e19c8e | 2022-04-15 15:11:02 -0700 | [diff] [blame] | 10 | New API code Copyright (c) 2016-2022 University of Cambridge |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 11 | |
| 12 | ----------------------------------------------------------------------------- |
| 13 | Redistribution and use in source and binary forms, with or without |
| 14 | modification, are permitted provided that the following conditions are met: |
| 15 | |
| 16 | * Redistributions of source code must retain the above copyright notice, |
| 17 | this list of conditions and the following disclaimer. |
| 18 | |
| 19 | * Redistributions in binary form must reproduce the above copyright |
| 20 | notice, this list of conditions and the following disclaimer in the |
| 21 | documentation and/or other materials provided with the distribution. |
| 22 | |
| 23 | * Neither the name of the University of Cambridge nor the names of its |
| 24 | contributors may be used to endorse or promote products derived from |
| 25 | this software without specific prior written permission. |
| 26 | |
| 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 37 | POSSIBILITY OF SUCH DAMAGE. |
| 38 | ----------------------------------------------------------------------------- |
| 39 | */ |
| 40 | |
| 41 | /* This module contains functions that scan a compiled pattern and change |
| 42 | repeats into possessive repeats where possible. */ |
| 43 | |
| 44 | |
| 45 | #ifdef HAVE_CONFIG_H |
| 46 | #include "config.h" |
| 47 | #endif |
| 48 | |
| 49 | |
| 50 | #include "pcre2_internal.h" |
| 51 | |
| 52 | |
| 53 | /************************************************* |
| 54 | * Tables for auto-possessification * |
| 55 | *************************************************/ |
| 56 | |
| 57 | /* This table is used to check whether auto-possessification is possible |
| 58 | between adjacent character-type opcodes. The left-hand (repeated) opcode is |
| 59 | used to select the row, and the right-hand opcode is use to select the column. |
| 60 | A value of 1 means that auto-possessification is OK. For example, the second |
| 61 | value in the first row means that \D+\d can be turned into \D++\d. |
| 62 | |
| 63 | The Unicode property types (\P and \p) have to be present to fill out the table |
| 64 | because of what their opcode values are, but the table values should always be |
| 65 | zero because property types are handled separately in the code. The last four |
| 66 | columns apply to items that cannot be repeated, so there is no need to have |
| 67 | rows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is |
| 68 | *not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ |
| 69 | |
| 70 | #define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1) |
| 71 | #define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1) |
| 72 | |
| 73 | static const uint8_t autoposstab[APTROWS][APTCOLS] = { |
| 74 | /* \D \d \S \s \W \w . .+ \C \P \p \R \H \h \V \v \X \Z \z $ $M */ |
| 75 | { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \D */ |
| 76 | { 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \d */ |
| 77 | { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \S */ |
| 78 | { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \s */ |
| 79 | { 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \W */ |
| 80 | { 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \w */ |
| 81 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* . */ |
| 82 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* .+ */ |
| 83 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \C */ |
| 84 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \P */ |
| 85 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \p */ |
| 86 | { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \R */ |
| 87 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \H */ |
| 88 | { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \h */ |
| 89 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \V */ |
| 90 | { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 }, /* \v */ |
| 91 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 } /* \X */ |
| 92 | }; |
| 93 | |
Janis Danisevskis | 8b979b2 | 2016-08-15 16:09:16 +0100 | [diff] [blame] | 94 | #ifdef SUPPORT_UNICODE |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 95 | /* This table is used to check whether auto-possessification is possible |
| 96 | between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The |
| 97 | left-hand (repeated) opcode is used to select the row, and the right-hand |
| 98 | opcode is used to select the column. The values are as follows: |
| 99 | |
| 100 | 0 Always return FALSE (never auto-possessify) |
| 101 | 1 Character groups are distinct (possessify if both are OP_PROP) |
| 102 | 2 Check character categories in the same group (general or particular) |
| 103 | 3 TRUE if the two opcodes are not the same (PROP vs NOTPROP) |
| 104 | |
| 105 | 4 Check left general category vs right particular category |
| 106 | 5 Check right general category vs left particular category |
| 107 | |
| 108 | 6 Left alphanum vs right general category |
| 109 | 7 Left space vs right general category |
| 110 | 8 Left word vs right general category |
| 111 | |
| 112 | 9 Right alphanum vs left general category |
| 113 | 10 Right space vs left general category |
| 114 | 11 Right word vs left general category |
| 115 | |
| 116 | 12 Left alphanum vs right particular category |
| 117 | 13 Left space vs right particular category |
| 118 | 14 Left word vs right particular category |
| 119 | |
| 120 | 15 Right alphanum vs left particular category |
| 121 | 16 Right space vs left particular category |
| 122 | 17 Right word vs left particular category |
| 123 | */ |
| 124 | |
| 125 | static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = { |
Elliott Hughes | 4e19c8e | 2022-04-15 15:11:02 -0700 | [diff] [blame] | 126 | /* ANY LAMP GC PC SC SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */ |
| 127 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */ |
| 128 | { 0, 3, 0, 0, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_LAMP */ |
| 129 | { 0, 0, 2, 4, 0, 0, 9, 10, 10, 11, 0, 0, 0, 0 }, /* PT_GC */ |
| 130 | { 0, 0, 5, 2, 0, 0, 15, 16, 16, 17, 0, 0, 0, 0 }, /* PT_PC */ |
| 131 | { 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SC */ |
| 132 | { 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SCX */ |
| 133 | { 0, 3, 6, 12, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_ALNUM */ |
| 134 | { 0, 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_SPACE */ |
| 135 | { 0, 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_PXSPACE */ |
| 136 | { 0, 0, 8, 14, 0, 0, 0, 1, 1, 3, 0, 0, 0, 0 }, /* PT_WORD */ |
| 137 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */ |
| 138 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0 }, /* PT_UCNC */ |
| 139 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_BIDICL */ |
| 140 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* PT_BOOL */ |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 141 | }; |
| 142 | |
| 143 | /* This table is used to check whether auto-possessification is possible |
| 144 | between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP) when one |
| 145 | specifies a general category and the other specifies a particular category. The |
| 146 | row is selected by the general category and the column by the particular |
| 147 | category. The value is 1 if the particular category is not part of the general |
| 148 | category. */ |
| 149 | |
| 150 | static const uint8_t catposstab[7][30] = { |
| 151 | /* Cc Cf Cn Co Cs Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc Pd Pe Pf Pi Po Ps Sc Sk Sm So Zl Zp Zs */ |
| 152 | { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* C */ |
| 153 | { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* L */ |
| 154 | { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* M */ |
| 155 | { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* N */ |
| 156 | { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 }, /* P */ |
| 157 | { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1 }, /* S */ |
| 158 | { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 } /* Z */ |
| 159 | }; |
| 160 | |
| 161 | /* This table is used when checking ALNUM, (PX)SPACE, SPACE, and WORD against |
| 162 | a general or particular category. The properties in each row are those |
| 163 | that apply to the character set in question. Duplication means that a little |
| 164 | unnecessary work is done when checking, but this keeps things much simpler |
| 165 | because they can all use the same code. For more details see the comment where |
| 166 | this table is used. |
| 167 | |
| 168 | Note: SPACE and PXSPACE used to be different because Perl excluded VT from |
| 169 | "space", but from Perl 5.18 it's included, so both categories are treated the |
| 170 | same here. */ |
| 171 | |
| 172 | static const uint8_t posspropstab[3][4] = { |
| 173 | { ucp_L, ucp_N, ucp_N, ucp_Nl }, /* ALNUM, 3rd and 4th values redundant */ |
| 174 | { ucp_Z, ucp_Z, ucp_C, ucp_Cc }, /* SPACE and PXSPACE, 2nd value redundant */ |
| 175 | { ucp_L, ucp_N, ucp_P, ucp_Po } /* WORD */ |
| 176 | }; |
Janis Danisevskis | 8b979b2 | 2016-08-15 16:09:16 +0100 | [diff] [blame] | 177 | #endif /* SUPPORT_UNICODE */ |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 178 | |
| 179 | |
| 180 | |
| 181 | #ifdef SUPPORT_UNICODE |
| 182 | /************************************************* |
| 183 | * Check a character and a property * |
| 184 | *************************************************/ |
| 185 | |
| 186 | /* This function is called by compare_opcodes() when a property item is |
| 187 | adjacent to a fixed character. |
| 188 | |
| 189 | Arguments: |
| 190 | c the character |
| 191 | ptype the property type |
| 192 | pdata the data for the type |
| 193 | negated TRUE if it's a negated property (\P or \p{^) |
| 194 | |
| 195 | Returns: TRUE if auto-possessifying is OK |
| 196 | */ |
| 197 | |
| 198 | static BOOL |
| 199 | check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata, |
| 200 | BOOL negated) |
| 201 | { |
Elliott Hughes | 4e19c8e | 2022-04-15 15:11:02 -0700 | [diff] [blame] | 202 | BOOL ok; |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 203 | const uint32_t *p; |
| 204 | const ucd_record *prop = GET_UCD(c); |
| 205 | |
| 206 | switch(ptype) |
| 207 | { |
| 208 | case PT_LAMP: |
| 209 | return (prop->chartype == ucp_Lu || |
| 210 | prop->chartype == ucp_Ll || |
| 211 | prop->chartype == ucp_Lt) == negated; |
| 212 | |
| 213 | case PT_GC: |
| 214 | return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated; |
| 215 | |
| 216 | case PT_PC: |
| 217 | return (pdata == prop->chartype) == negated; |
| 218 | |
| 219 | case PT_SC: |
| 220 | return (pdata == prop->script) == negated; |
| 221 | |
Elliott Hughes | 4e19c8e | 2022-04-15 15:11:02 -0700 | [diff] [blame] | 222 | case PT_SCX: |
| 223 | ok = (pdata == prop->script |
| 224 | || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0); |
| 225 | return ok == negated; |
| 226 | |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 227 | /* These are specials */ |
| 228 | |
| 229 | case PT_ALNUM: |
| 230 | return (PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
| 231 | PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated; |
| 232 | |
| 233 | /* Perl space used to exclude VT, but from Perl 5.18 it is included, which |
| 234 | means that Perl space and POSIX space are now identical. PCRE was changed |
| 235 | at release 8.34. */ |
| 236 | |
| 237 | case PT_SPACE: /* Perl space */ |
| 238 | case PT_PXSPACE: /* POSIX space */ |
| 239 | switch(c) |
| 240 | { |
| 241 | HSPACE_CASES: |
| 242 | VSPACE_CASES: |
| 243 | return negated; |
| 244 | |
| 245 | default: |
| 246 | return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated; |
| 247 | } |
| 248 | break; /* Control never reaches here */ |
| 249 | |
| 250 | case PT_WORD: |
| 251 | return (PRIV(ucp_gentype)[prop->chartype] == ucp_L || |
| 252 | PRIV(ucp_gentype)[prop->chartype] == ucp_N || |
| 253 | c == CHAR_UNDERSCORE) == negated; |
| 254 | |
| 255 | case PT_CLIST: |
| 256 | p = PRIV(ucd_caseless_sets) + prop->caseset; |
| 257 | for (;;) |
| 258 | { |
| 259 | if (c < *p) return !negated; |
| 260 | if (c == *p++) return negated; |
| 261 | } |
| 262 | break; /* Control never reaches here */ |
Elliott Hughes | 4e19c8e | 2022-04-15 15:11:02 -0700 | [diff] [blame] | 263 | |
| 264 | /* Haven't yet thought these through. */ |
| 265 | |
| 266 | case PT_BIDICL: |
| 267 | return FALSE; |
| 268 | |
| 269 | case PT_BOOL: |
| 270 | return FALSE; |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 271 | } |
| 272 | |
| 273 | return FALSE; |
| 274 | } |
| 275 | #endif /* SUPPORT_UNICODE */ |
| 276 | |
| 277 | |
| 278 | |
| 279 | /************************************************* |
| 280 | * Base opcode of repeated opcodes * |
| 281 | *************************************************/ |
| 282 | |
| 283 | /* Returns the base opcode for repeated single character type opcodes. If the |
| 284 | opcode is not a repeated character type, it returns with the original value. |
| 285 | |
| 286 | Arguments: c opcode |
| 287 | Returns: base opcode for the type |
| 288 | */ |
| 289 | |
| 290 | static PCRE2_UCHAR |
| 291 | get_repeat_base(PCRE2_UCHAR c) |
| 292 | { |
| 293 | return (c > OP_TYPEPOSUPTO)? c : |
| 294 | (c >= OP_TYPESTAR)? OP_TYPESTAR : |
| 295 | (c >= OP_NOTSTARI)? OP_NOTSTARI : |
| 296 | (c >= OP_NOTSTAR)? OP_NOTSTAR : |
| 297 | (c >= OP_STARI)? OP_STARI : |
| 298 | OP_STAR; |
| 299 | } |
| 300 | |
| 301 | |
| 302 | /************************************************* |
| 303 | * Fill the character property list * |
| 304 | *************************************************/ |
| 305 | |
| 306 | /* Checks whether the code points to an opcode that can take part in auto- |
| 307 | possessification, and if so, fills a list with its properties. |
| 308 | |
| 309 | Arguments: |
| 310 | code points to start of expression |
| 311 | utf TRUE if in UTF mode |
Elliott Hughes | 2dbd7d2 | 2020-06-03 14:32:37 -0700 | [diff] [blame] | 312 | ucp TRUE if in UCP mode |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 313 | fcc points to the case-flipping table |
| 314 | list points to output list |
| 315 | list[0] will be filled with the opcode |
| 316 | list[1] will be non-zero if this opcode |
| 317 | can match an empty character string |
| 318 | list[2..7] depends on the opcode |
| 319 | |
| 320 | Returns: points to the start of the next opcode if *code is accepted |
| 321 | NULL if *code is not accepted |
| 322 | */ |
| 323 | |
| 324 | static PCRE2_SPTR |
Elliott Hughes | 2dbd7d2 | 2020-06-03 14:32:37 -0700 | [diff] [blame] | 325 | get_chr_property_list(PCRE2_SPTR code, BOOL utf, BOOL ucp, const uint8_t *fcc, |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 326 | uint32_t *list) |
| 327 | { |
| 328 | PCRE2_UCHAR c = *code; |
| 329 | PCRE2_UCHAR base; |
| 330 | PCRE2_SPTR end; |
| 331 | uint32_t chr; |
| 332 | |
| 333 | #ifdef SUPPORT_UNICODE |
| 334 | uint32_t *clist_dest; |
| 335 | const uint32_t *clist_src; |
| 336 | #else |
Elliott Hughes | 2dbd7d2 | 2020-06-03 14:32:37 -0700 | [diff] [blame] | 337 | (void)utf; /* Suppress "unused parameter" compiler warnings */ |
| 338 | (void)ucp; |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 339 | #endif |
| 340 | |
| 341 | list[0] = c; |
| 342 | list[1] = FALSE; |
| 343 | code++; |
| 344 | |
| 345 | if (c >= OP_STAR && c <= OP_TYPEPOSUPTO) |
| 346 | { |
| 347 | base = get_repeat_base(c); |
| 348 | c -= (base - OP_STAR); |
| 349 | |
| 350 | if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO) |
| 351 | code += IMM2_SIZE; |
| 352 | |
| 353 | list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT && |
| 354 | c != OP_POSPLUS); |
| 355 | |
| 356 | switch(base) |
| 357 | { |
| 358 | case OP_STAR: |
| 359 | list[0] = OP_CHAR; |
| 360 | break; |
| 361 | |
| 362 | case OP_STARI: |
| 363 | list[0] = OP_CHARI; |
| 364 | break; |
| 365 | |
| 366 | case OP_NOTSTAR: |
| 367 | list[0] = OP_NOT; |
| 368 | break; |
| 369 | |
| 370 | case OP_NOTSTARI: |
| 371 | list[0] = OP_NOTI; |
| 372 | break; |
| 373 | |
| 374 | case OP_TYPESTAR: |
| 375 | list[0] = *code; |
| 376 | code++; |
| 377 | break; |
| 378 | } |
| 379 | c = list[0]; |
| 380 | } |
| 381 | |
| 382 | switch(c) |
| 383 | { |
| 384 | case OP_NOT_DIGIT: |
| 385 | case OP_DIGIT: |
| 386 | case OP_NOT_WHITESPACE: |
| 387 | case OP_WHITESPACE: |
| 388 | case OP_NOT_WORDCHAR: |
| 389 | case OP_WORDCHAR: |
| 390 | case OP_ANY: |
| 391 | case OP_ALLANY: |
| 392 | case OP_ANYNL: |
| 393 | case OP_NOT_HSPACE: |
| 394 | case OP_HSPACE: |
| 395 | case OP_NOT_VSPACE: |
| 396 | case OP_VSPACE: |
| 397 | case OP_EXTUNI: |
| 398 | case OP_EODN: |
| 399 | case OP_EOD: |
| 400 | case OP_DOLL: |
| 401 | case OP_DOLLM: |
| 402 | return code; |
| 403 | |
| 404 | case OP_CHAR: |
| 405 | case OP_NOT: |
| 406 | GETCHARINCTEST(chr, code); |
| 407 | list[2] = chr; |
| 408 | list[3] = NOTACHAR; |
| 409 | return code; |
| 410 | |
| 411 | case OP_CHARI: |
| 412 | case OP_NOTI: |
| 413 | list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT; |
| 414 | GETCHARINCTEST(chr, code); |
| 415 | list[2] = chr; |
| 416 | |
| 417 | #ifdef SUPPORT_UNICODE |
Elliott Hughes | 2dbd7d2 | 2020-06-03 14:32:37 -0700 | [diff] [blame] | 418 | if (chr < 128 || (chr < 256 && !utf && !ucp)) |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 419 | list[3] = fcc[chr]; |
| 420 | else |
| 421 | list[3] = UCD_OTHERCASE(chr); |
| 422 | #elif defined SUPPORT_WIDE_CHARS |
| 423 | list[3] = (chr < 256) ? fcc[chr] : chr; |
| 424 | #else |
| 425 | list[3] = fcc[chr]; |
| 426 | #endif |
| 427 | |
| 428 | /* The othercase might be the same value. */ |
| 429 | |
| 430 | if (chr == list[3]) |
| 431 | list[3] = NOTACHAR; |
| 432 | else |
| 433 | list[4] = NOTACHAR; |
| 434 | return code; |
| 435 | |
| 436 | #ifdef SUPPORT_UNICODE |
| 437 | case OP_PROP: |
| 438 | case OP_NOTPROP: |
| 439 | if (code[0] != PT_CLIST) |
| 440 | { |
| 441 | list[2] = code[0]; |
| 442 | list[3] = code[1]; |
| 443 | return code + 2; |
| 444 | } |
| 445 | |
| 446 | /* Convert only if we have enough space. */ |
| 447 | |
| 448 | clist_src = PRIV(ucd_caseless_sets) + code[1]; |
| 449 | clist_dest = list + 2; |
| 450 | code += 2; |
| 451 | |
| 452 | do { |
| 453 | if (clist_dest >= list + 8) |
| 454 | { |
| 455 | /* Early return if there is not enough space. This should never |
| 456 | happen, since all clists are shorter than 5 character now. */ |
| 457 | list[2] = code[0]; |
| 458 | list[3] = code[1]; |
| 459 | return code; |
| 460 | } |
| 461 | *clist_dest++ = *clist_src; |
| 462 | } |
| 463 | while(*clist_src++ != NOTACHAR); |
| 464 | |
| 465 | /* All characters are stored. The terminating NOTACHAR is copied from the |
| 466 | clist itself. */ |
| 467 | |
| 468 | list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT; |
| 469 | return code; |
| 470 | #endif |
| 471 | |
| 472 | case OP_NCLASS: |
| 473 | case OP_CLASS: |
| 474 | #ifdef SUPPORT_WIDE_CHARS |
| 475 | case OP_XCLASS: |
| 476 | if (c == OP_XCLASS) |
| 477 | end = code + GET(code, 0) - 1; |
| 478 | else |
| 479 | #endif |
| 480 | end = code + 32 / sizeof(PCRE2_UCHAR); |
| 481 | |
| 482 | switch(*end) |
| 483 | { |
| 484 | case OP_CRSTAR: |
| 485 | case OP_CRMINSTAR: |
| 486 | case OP_CRQUERY: |
| 487 | case OP_CRMINQUERY: |
| 488 | case OP_CRPOSSTAR: |
| 489 | case OP_CRPOSQUERY: |
| 490 | list[1] = TRUE; |
| 491 | end++; |
| 492 | break; |
| 493 | |
| 494 | case OP_CRPLUS: |
| 495 | case OP_CRMINPLUS: |
| 496 | case OP_CRPOSPLUS: |
| 497 | end++; |
| 498 | break; |
| 499 | |
| 500 | case OP_CRRANGE: |
| 501 | case OP_CRMINRANGE: |
| 502 | case OP_CRPOSRANGE: |
| 503 | list[1] = (GET2(end, 1) == 0); |
| 504 | end += 1 + 2 * IMM2_SIZE; |
| 505 | break; |
| 506 | } |
| 507 | list[2] = (uint32_t)(end - code); |
| 508 | return end; |
| 509 | } |
Elliott Hughes | 378b175 | 2021-06-08 13:42:40 -0700 | [diff] [blame] | 510 | |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 511 | return NULL; /* Opcode not accepted */ |
| 512 | } |
| 513 | |
| 514 | |
| 515 | |
| 516 | /************************************************* |
| 517 | * Scan further character sets for match * |
| 518 | *************************************************/ |
| 519 | |
| 520 | /* Checks whether the base and the current opcode have a common character, in |
| 521 | which case the base cannot be possessified. |
| 522 | |
| 523 | Arguments: |
| 524 | code points to the byte code |
| 525 | utf TRUE in UTF mode |
Elliott Hughes | 2dbd7d2 | 2020-06-03 14:32:37 -0700 | [diff] [blame] | 526 | ucp TRUE in UCP mode |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 527 | cb compile data block |
| 528 | base_list the data list of the base opcode |
Elliott Hughes | 653c210 | 2019-01-09 15:41:36 -0800 | [diff] [blame] | 529 | base_end the end of the base opcode |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 530 | rec_limit points to recursion depth counter |
| 531 | |
| 532 | Returns: TRUE if the auto-possessification is possible |
| 533 | */ |
| 534 | |
| 535 | static BOOL |
Elliott Hughes | 2dbd7d2 | 2020-06-03 14:32:37 -0700 | [diff] [blame] | 536 | compare_opcodes(PCRE2_SPTR code, BOOL utf, BOOL ucp, const compile_block *cb, |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 537 | const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit) |
| 538 | { |
| 539 | PCRE2_UCHAR c; |
| 540 | uint32_t list[8]; |
| 541 | const uint32_t *chr_ptr; |
| 542 | const uint32_t *ochr_ptr; |
| 543 | const uint32_t *list_ptr; |
| 544 | PCRE2_SPTR next_code; |
| 545 | #ifdef SUPPORT_WIDE_CHARS |
| 546 | PCRE2_SPTR xclass_flags; |
| 547 | #endif |
| 548 | const uint8_t *class_bitset; |
| 549 | const uint8_t *set1, *set2, *set_end; |
| 550 | uint32_t chr; |
| 551 | BOOL accepted, invert_bits; |
| 552 | BOOL entered_a_group = FALSE; |
| 553 | |
| 554 | if (--(*rec_limit) <= 0) return FALSE; /* Recursion has gone too deep */ |
| 555 | |
| 556 | /* Note: the base_list[1] contains whether the current opcode has a greedy |
| 557 | (represented by a non-zero value) quantifier. This is a different from |
| 558 | other character type lists, which store here that the character iterator |
| 559 | matches to an empty string (also represented by a non-zero value). */ |
| 560 | |
| 561 | for(;;) |
| 562 | { |
| 563 | /* All operations move the code pointer forward. |
| 564 | Therefore infinite recursions are not possible. */ |
| 565 | |
| 566 | c = *code; |
| 567 | |
| 568 | /* Skip over callouts */ |
| 569 | |
| 570 | if (c == OP_CALLOUT) |
| 571 | { |
| 572 | code += PRIV(OP_lengths)[c]; |
| 573 | continue; |
| 574 | } |
| 575 | |
| 576 | if (c == OP_CALLOUT_STR) |
| 577 | { |
| 578 | code += GET(code, 1 + 2*LINK_SIZE); |
| 579 | continue; |
| 580 | } |
| 581 | |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 582 | /* At the end of a branch, skip to the end of the group. */ |
| 583 | |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 584 | if (c == OP_ALT) |
| 585 | { |
| 586 | do code += GET(code, 1); while (*code == OP_ALT); |
| 587 | c = *code; |
| 588 | } |
| 589 | |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 590 | /* Inspect the next opcode. */ |
| 591 | |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 592 | switch(c) |
| 593 | { |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 594 | /* We can always possessify a greedy iterator at the end of the pattern, |
| 595 | which is reached after skipping over the final OP_KET. A non-greedy |
| 596 | iterator must never be possessified. */ |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 597 | |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 598 | case OP_END: |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 599 | return base_list[1] != 0; |
| 600 | |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 601 | /* When an iterator is at the end of certain kinds of group we can inspect |
| 602 | what follows the group by skipping over the closing ket. Note that this |
| 603 | does not apply to OP_KETRMAX or OP_KETRMIN because what follows any given |
| 604 | iteration is variable (could be another iteration or could be the next |
| 605 | item). As these two opcodes are not listed in the next switch, they will |
| 606 | end up as the next code to inspect, and return FALSE by virtue of being |
| 607 | unsupported. */ |
| 608 | |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 609 | case OP_KET: |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 610 | case OP_KETRPOS: |
| 611 | /* The non-greedy case cannot be converted to a possessive form. */ |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 612 | |
| 613 | if (base_list[1] == 0) return FALSE; |
| 614 | |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 615 | /* If the bracket is capturing it might be referenced by an OP_RECURSE |
| 616 | so its last iterator can never be possessified if the pattern contains |
| 617 | recursions. (This could be improved by keeping a list of group numbers that |
| 618 | are called by recursion.) */ |
| 619 | |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 620 | switch(*(code - GET(code, 1))) |
| 621 | { |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 622 | case OP_CBRA: |
| 623 | case OP_SCBRA: |
| 624 | case OP_CBRAPOS: |
| 625 | case OP_SCBRAPOS: |
| 626 | if (cb->had_recurse) return FALSE; |
| 627 | break; |
| 628 | |
Elliott Hughes | 0c26e19 | 2019-08-07 12:24:46 -0700 | [diff] [blame] | 629 | /* A script run might have to backtrack if the iterated item can match |
| 630 | characters from more than one script. So give up unless repeating an |
| 631 | explicit character. */ |
| 632 | |
| 633 | case OP_SCRIPT_RUN: |
| 634 | if (base_list[0] != OP_CHAR && base_list[0] != OP_CHARI) |
| 635 | return FALSE; |
| 636 | break; |
| 637 | |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 638 | /* Atomic sub-patterns and assertions can always auto-possessify their |
| 639 | last iterator. However, if the group was entered as a result of checking |
| 640 | a previous iterator, this is not possible. */ |
| 641 | |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 642 | case OP_ASSERT: |
| 643 | case OP_ASSERT_NOT: |
| 644 | case OP_ASSERTBACK: |
| 645 | case OP_ASSERTBACK_NOT: |
| 646 | case OP_ONCE: |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 647 | return !entered_a_group; |
Elliott Hughes | 2dbd7d2 | 2020-06-03 14:32:37 -0700 | [diff] [blame] | 648 | |
| 649 | /* Non-atomic assertions - don't possessify last iterator. This needs |
| 650 | more thought. */ |
| 651 | |
| 652 | case OP_ASSERT_NA: |
| 653 | case OP_ASSERTBACK_NA: |
| 654 | return FALSE; |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 655 | } |
| 656 | |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 657 | /* Skip over the bracket and inspect what comes next. */ |
| 658 | |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 659 | code += PRIV(OP_lengths)[c]; |
| 660 | continue; |
| 661 | |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 662 | /* Handle cases where the next item is a group. */ |
| 663 | |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 664 | case OP_ONCE: |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 665 | case OP_BRA: |
| 666 | case OP_CBRA: |
| 667 | next_code = code + GET(code, 1); |
| 668 | code += PRIV(OP_lengths)[c]; |
| 669 | |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 670 | /* Check each branch. We have to recurse a level for all but the last |
| 671 | branch. */ |
| 672 | |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 673 | while (*next_code == OP_ALT) |
| 674 | { |
Elliott Hughes | 2dbd7d2 | 2020-06-03 14:32:37 -0700 | [diff] [blame] | 675 | if (!compare_opcodes(code, utf, ucp, cb, base_list, base_end, rec_limit)) |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 676 | return FALSE; |
| 677 | code = next_code + 1 + LINK_SIZE; |
| 678 | next_code += GET(next_code, 1); |
| 679 | } |
| 680 | |
| 681 | entered_a_group = TRUE; |
| 682 | continue; |
| 683 | |
| 684 | case OP_BRAZERO: |
| 685 | case OP_BRAMINZERO: |
| 686 | |
| 687 | next_code = code + 1; |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 688 | if (*next_code != OP_BRA && *next_code != OP_CBRA && |
| 689 | *next_code != OP_ONCE) return FALSE; |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 690 | |
| 691 | do next_code += GET(next_code, 1); while (*next_code == OP_ALT); |
| 692 | |
| 693 | /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */ |
| 694 | |
| 695 | next_code += 1 + LINK_SIZE; |
Elliott Hughes | 2dbd7d2 | 2020-06-03 14:32:37 -0700 | [diff] [blame] | 696 | if (!compare_opcodes(next_code, utf, ucp, cb, base_list, base_end, |
| 697 | rec_limit)) |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 698 | return FALSE; |
| 699 | |
| 700 | code += PRIV(OP_lengths)[c]; |
| 701 | continue; |
| 702 | |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 703 | /* The next opcode does not need special handling; fall through and use it |
| 704 | to see if the base can be possessified. */ |
| 705 | |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 706 | default: |
| 707 | break; |
| 708 | } |
| 709 | |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 710 | /* We now have the next appropriate opcode to compare with the base. Check |
| 711 | for a supported opcode, and load its properties. */ |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 712 | |
Elliott Hughes | 2dbd7d2 | 2020-06-03 14:32:37 -0700 | [diff] [blame] | 713 | code = get_chr_property_list(code, utf, ucp, cb->fcc, list); |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 714 | if (code == NULL) return FALSE; /* Unsupported */ |
| 715 | |
| 716 | /* If either opcode is a small character list, set pointers for comparing |
| 717 | characters from that list with another list, or with a property. */ |
| 718 | |
| 719 | if (base_list[0] == OP_CHAR) |
| 720 | { |
| 721 | chr_ptr = base_list + 2; |
| 722 | list_ptr = list; |
| 723 | } |
| 724 | else if (list[0] == OP_CHAR) |
| 725 | { |
| 726 | chr_ptr = list + 2; |
| 727 | list_ptr = base_list; |
| 728 | } |
| 729 | |
| 730 | /* Character bitsets can also be compared to certain opcodes. */ |
| 731 | |
| 732 | else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS |
| 733 | #if PCRE2_CODE_UNIT_WIDTH == 8 |
| 734 | /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */ |
| 735 | || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS)) |
| 736 | #endif |
| 737 | ) |
| 738 | { |
| 739 | #if PCRE2_CODE_UNIT_WIDTH == 8 |
| 740 | if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS)) |
| 741 | #else |
| 742 | if (base_list[0] == OP_CLASS) |
| 743 | #endif |
| 744 | { |
| 745 | set1 = (uint8_t *)(base_end - base_list[2]); |
| 746 | list_ptr = list; |
| 747 | } |
| 748 | else |
| 749 | { |
| 750 | set1 = (uint8_t *)(code - list[2]); |
| 751 | list_ptr = base_list; |
| 752 | } |
| 753 | |
| 754 | invert_bits = FALSE; |
| 755 | switch(list_ptr[0]) |
| 756 | { |
| 757 | case OP_CLASS: |
| 758 | case OP_NCLASS: |
| 759 | set2 = (uint8_t *) |
| 760 | ((list_ptr == list ? code : base_end) - list_ptr[2]); |
| 761 | break; |
| 762 | |
| 763 | #ifdef SUPPORT_WIDE_CHARS |
| 764 | case OP_XCLASS: |
| 765 | xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE; |
| 766 | if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE; |
| 767 | if ((*xclass_flags & XCL_MAP) == 0) |
| 768 | { |
| 769 | /* No bits are set for characters < 256. */ |
Elliott Hughes | 653c210 | 2019-01-09 15:41:36 -0800 | [diff] [blame] | 770 | if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0; |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 771 | /* Might be an empty repeat. */ |
| 772 | continue; |
| 773 | } |
| 774 | set2 = (uint8_t *)(xclass_flags + 1); |
| 775 | break; |
| 776 | #endif |
| 777 | |
| 778 | case OP_NOT_DIGIT: |
| 779 | invert_bits = TRUE; |
| 780 | /* Fall through */ |
| 781 | case OP_DIGIT: |
| 782 | set2 = (uint8_t *)(cb->cbits + cbit_digit); |
| 783 | break; |
| 784 | |
| 785 | case OP_NOT_WHITESPACE: |
| 786 | invert_bits = TRUE; |
| 787 | /* Fall through */ |
| 788 | case OP_WHITESPACE: |
| 789 | set2 = (uint8_t *)(cb->cbits + cbit_space); |
| 790 | break; |
| 791 | |
| 792 | case OP_NOT_WORDCHAR: |
| 793 | invert_bits = TRUE; |
| 794 | /* Fall through */ |
| 795 | case OP_WORDCHAR: |
| 796 | set2 = (uint8_t *)(cb->cbits + cbit_word); |
| 797 | break; |
| 798 | |
| 799 | default: |
| 800 | return FALSE; |
| 801 | } |
| 802 | |
| 803 | /* Because the bit sets are unaligned bytes, we need to perform byte |
| 804 | comparison here. */ |
| 805 | |
| 806 | set_end = set1 + 32; |
| 807 | if (invert_bits) |
| 808 | { |
| 809 | do |
| 810 | { |
| 811 | if ((*set1++ & ~(*set2++)) != 0) return FALSE; |
| 812 | } |
| 813 | while (set1 < set_end); |
| 814 | } |
| 815 | else |
| 816 | { |
| 817 | do |
| 818 | { |
| 819 | if ((*set1++ & *set2++) != 0) return FALSE; |
| 820 | } |
| 821 | while (set1 < set_end); |
| 822 | } |
| 823 | |
| 824 | if (list[1] == 0) return TRUE; |
| 825 | /* Might be an empty repeat. */ |
| 826 | continue; |
| 827 | } |
| 828 | |
| 829 | /* Some property combinations also acceptable. Unicode property opcodes are |
| 830 | processed specially; the rest can be handled with a lookup table. */ |
| 831 | |
| 832 | else |
| 833 | { |
| 834 | uint32_t leftop, rightop; |
| 835 | |
| 836 | leftop = base_list[0]; |
| 837 | rightop = list[0]; |
| 838 | |
| 839 | #ifdef SUPPORT_UNICODE |
| 840 | accepted = FALSE; /* Always set in non-unicode case. */ |
| 841 | if (leftop == OP_PROP || leftop == OP_NOTPROP) |
| 842 | { |
| 843 | if (rightop == OP_EOD) |
| 844 | accepted = TRUE; |
| 845 | else if (rightop == OP_PROP || rightop == OP_NOTPROP) |
| 846 | { |
| 847 | int n; |
| 848 | const uint8_t *p; |
| 849 | BOOL same = leftop == rightop; |
| 850 | BOOL lisprop = leftop == OP_PROP; |
| 851 | BOOL risprop = rightop == OP_PROP; |
| 852 | BOOL bothprop = lisprop && risprop; |
| 853 | |
| 854 | /* There's a table that specifies how each combination is to be |
| 855 | processed: |
| 856 | 0 Always return FALSE (never auto-possessify) |
| 857 | 1 Character groups are distinct (possessify if both are OP_PROP) |
| 858 | 2 Check character categories in the same group (general or particular) |
| 859 | 3 Return TRUE if the two opcodes are not the same |
| 860 | ... see comments below |
| 861 | */ |
| 862 | |
| 863 | n = propposstab[base_list[2]][list[2]]; |
| 864 | switch(n) |
| 865 | { |
| 866 | case 0: break; |
| 867 | case 1: accepted = bothprop; break; |
| 868 | case 2: accepted = (base_list[3] == list[3]) != same; break; |
| 869 | case 3: accepted = !same; break; |
| 870 | |
| 871 | case 4: /* Left general category, right particular category */ |
| 872 | accepted = risprop && catposstab[base_list[3]][list[3]] == same; |
| 873 | break; |
| 874 | |
| 875 | case 5: /* Right general category, left particular category */ |
| 876 | accepted = lisprop && catposstab[list[3]][base_list[3]] == same; |
| 877 | break; |
| 878 | |
| 879 | /* This code is logically tricky. Think hard before fiddling with it. |
| 880 | The posspropstab table has four entries per row. Each row relates to |
| 881 | one of PCRE's special properties such as ALNUM or SPACE or WORD. |
| 882 | Only WORD actually needs all four entries, but using repeats for the |
| 883 | others means they can all use the same code below. |
| 884 | |
| 885 | The first two entries in each row are Unicode general categories, and |
| 886 | apply always, because all the characters they include are part of the |
| 887 | PCRE character set. The third and fourth entries are a general and a |
| 888 | particular category, respectively, that include one or more relevant |
| 889 | characters. One or the other is used, depending on whether the check |
| 890 | is for a general or a particular category. However, in both cases the |
| 891 | category contains more characters than the specials that are defined |
| 892 | for the property being tested against. Therefore, it cannot be used |
| 893 | in a NOTPROP case. |
| 894 | |
| 895 | Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po. |
| 896 | Underscore is covered by ucp_P or ucp_Po. */ |
| 897 | |
| 898 | case 6: /* Left alphanum vs right general category */ |
| 899 | case 7: /* Left space vs right general category */ |
| 900 | case 8: /* Left word vs right general category */ |
| 901 | p = posspropstab[n-6]; |
| 902 | accepted = risprop && lisprop == |
| 903 | (list[3] != p[0] && |
| 904 | list[3] != p[1] && |
| 905 | (list[3] != p[2] || !lisprop)); |
| 906 | break; |
| 907 | |
| 908 | case 9: /* Right alphanum vs left general category */ |
| 909 | case 10: /* Right space vs left general category */ |
| 910 | case 11: /* Right word vs left general category */ |
| 911 | p = posspropstab[n-9]; |
| 912 | accepted = lisprop && risprop == |
| 913 | (base_list[3] != p[0] && |
| 914 | base_list[3] != p[1] && |
| 915 | (base_list[3] != p[2] || !risprop)); |
| 916 | break; |
| 917 | |
| 918 | case 12: /* Left alphanum vs right particular category */ |
| 919 | case 13: /* Left space vs right particular category */ |
| 920 | case 14: /* Left word vs right particular category */ |
| 921 | p = posspropstab[n-12]; |
| 922 | accepted = risprop && lisprop == |
| 923 | (catposstab[p[0]][list[3]] && |
| 924 | catposstab[p[1]][list[3]] && |
| 925 | (list[3] != p[3] || !lisprop)); |
| 926 | break; |
| 927 | |
| 928 | case 15: /* Right alphanum vs left particular category */ |
| 929 | case 16: /* Right space vs left particular category */ |
| 930 | case 17: /* Right word vs left particular category */ |
| 931 | p = posspropstab[n-15]; |
| 932 | accepted = lisprop && risprop == |
| 933 | (catposstab[p[0]][base_list[3]] && |
| 934 | catposstab[p[1]][base_list[3]] && |
| 935 | (base_list[3] != p[3] || !risprop)); |
| 936 | break; |
| 937 | } |
| 938 | } |
| 939 | } |
| 940 | |
| 941 | else |
| 942 | #endif /* SUPPORT_UNICODE */ |
| 943 | |
| 944 | accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP && |
| 945 | rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP && |
| 946 | autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP]; |
| 947 | |
| 948 | if (!accepted) return FALSE; |
| 949 | |
| 950 | if (list[1] == 0) return TRUE; |
| 951 | /* Might be an empty repeat. */ |
| 952 | continue; |
| 953 | } |
| 954 | |
| 955 | /* Control reaches here only if one of the items is a small character list. |
| 956 | All characters are checked against the other side. */ |
| 957 | |
| 958 | do |
| 959 | { |
| 960 | chr = *chr_ptr; |
| 961 | |
| 962 | switch(list_ptr[0]) |
| 963 | { |
| 964 | case OP_CHAR: |
| 965 | ochr_ptr = list_ptr + 2; |
| 966 | do |
| 967 | { |
| 968 | if (chr == *ochr_ptr) return FALSE; |
| 969 | ochr_ptr++; |
| 970 | } |
| 971 | while(*ochr_ptr != NOTACHAR); |
| 972 | break; |
| 973 | |
| 974 | case OP_NOT: |
| 975 | ochr_ptr = list_ptr + 2; |
| 976 | do |
| 977 | { |
| 978 | if (chr == *ochr_ptr) |
| 979 | break; |
| 980 | ochr_ptr++; |
| 981 | } |
| 982 | while(*ochr_ptr != NOTACHAR); |
| 983 | if (*ochr_ptr == NOTACHAR) return FALSE; /* Not found */ |
| 984 | break; |
| 985 | |
| 986 | /* Note that OP_DIGIT etc. are generated only when PCRE2_UCP is *not* |
| 987 | set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ |
| 988 | |
| 989 | case OP_DIGIT: |
| 990 | if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE; |
| 991 | break; |
| 992 | |
| 993 | case OP_NOT_DIGIT: |
| 994 | if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE; |
| 995 | break; |
| 996 | |
| 997 | case OP_WHITESPACE: |
| 998 | if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE; |
| 999 | break; |
| 1000 | |
| 1001 | case OP_NOT_WHITESPACE: |
| 1002 | if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE; |
| 1003 | break; |
| 1004 | |
| 1005 | case OP_WORDCHAR: |
| 1006 | if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE; |
| 1007 | break; |
| 1008 | |
| 1009 | case OP_NOT_WORDCHAR: |
| 1010 | if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE; |
| 1011 | break; |
| 1012 | |
| 1013 | case OP_HSPACE: |
| 1014 | switch(chr) |
| 1015 | { |
| 1016 | HSPACE_CASES: return FALSE; |
| 1017 | default: break; |
| 1018 | } |
| 1019 | break; |
| 1020 | |
| 1021 | case OP_NOT_HSPACE: |
| 1022 | switch(chr) |
| 1023 | { |
| 1024 | HSPACE_CASES: break; |
| 1025 | default: return FALSE; |
| 1026 | } |
| 1027 | break; |
| 1028 | |
| 1029 | case OP_ANYNL: |
| 1030 | case OP_VSPACE: |
| 1031 | switch(chr) |
| 1032 | { |
| 1033 | VSPACE_CASES: return FALSE; |
| 1034 | default: break; |
| 1035 | } |
| 1036 | break; |
| 1037 | |
| 1038 | case OP_NOT_VSPACE: |
| 1039 | switch(chr) |
| 1040 | { |
| 1041 | VSPACE_CASES: break; |
| 1042 | default: return FALSE; |
| 1043 | } |
| 1044 | break; |
| 1045 | |
| 1046 | case OP_DOLL: |
| 1047 | case OP_EODN: |
| 1048 | switch (chr) |
| 1049 | { |
| 1050 | case CHAR_CR: |
| 1051 | case CHAR_LF: |
| 1052 | case CHAR_VT: |
| 1053 | case CHAR_FF: |
| 1054 | case CHAR_NEL: |
| 1055 | #ifndef EBCDIC |
| 1056 | case 0x2028: |
| 1057 | case 0x2029: |
| 1058 | #endif /* Not EBCDIC */ |
| 1059 | return FALSE; |
| 1060 | } |
| 1061 | break; |
| 1062 | |
| 1063 | case OP_EOD: /* Can always possessify before \z */ |
| 1064 | break; |
| 1065 | |
| 1066 | #ifdef SUPPORT_UNICODE |
| 1067 | case OP_PROP: |
| 1068 | case OP_NOTPROP: |
| 1069 | if (!check_char_prop(chr, list_ptr[2], list_ptr[3], |
| 1070 | list_ptr[0] == OP_NOTPROP)) |
| 1071 | return FALSE; |
| 1072 | break; |
| 1073 | #endif |
| 1074 | |
| 1075 | case OP_NCLASS: |
| 1076 | if (chr > 255) return FALSE; |
| 1077 | /* Fall through */ |
| 1078 | |
| 1079 | case OP_CLASS: |
| 1080 | if (chr > 255) break; |
| 1081 | class_bitset = (uint8_t *) |
| 1082 | ((list_ptr == list ? code : base_end) - list_ptr[2]); |
Elliott Hughes | 0c26e19 | 2019-08-07 12:24:46 -0700 | [diff] [blame] | 1083 | if ((class_bitset[chr >> 3] & (1u << (chr & 7))) != 0) return FALSE; |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 1084 | break; |
| 1085 | |
| 1086 | #ifdef SUPPORT_WIDE_CHARS |
| 1087 | case OP_XCLASS: |
| 1088 | if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) - |
| 1089 | list_ptr[2] + LINK_SIZE, utf)) return FALSE; |
| 1090 | break; |
| 1091 | #endif |
| 1092 | |
| 1093 | default: |
| 1094 | return FALSE; |
| 1095 | } |
| 1096 | |
| 1097 | chr_ptr++; |
| 1098 | } |
| 1099 | while(*chr_ptr != NOTACHAR); |
| 1100 | |
| 1101 | /* At least one character must be matched from this opcode. */ |
| 1102 | |
| 1103 | if (list[1] == 0) return TRUE; |
| 1104 | } |
| 1105 | |
| 1106 | /* Control never reaches here. There used to be a fail-save return FALSE; here, |
| 1107 | but some compilers complain about an unreachable statement. */ |
| 1108 | } |
| 1109 | |
| 1110 | |
| 1111 | |
| 1112 | /************************************************* |
| 1113 | * Scan compiled regex for auto-possession * |
| 1114 | *************************************************/ |
| 1115 | |
| 1116 | /* Replaces single character iterations with their possessive alternatives |
| 1117 | if appropriate. This function modifies the compiled opcode! Hitting a |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 1118 | non-existent opcode may indicate a bug in PCRE2, but it can also be caused if a |
| 1119 | bad UTF string was compiled with PCRE2_NO_UTF_CHECK. The rec_limit catches |
| 1120 | overly complicated or large patterns. In these cases, the check just stops, |
| 1121 | leaving the remainder of the pattern unpossessified. |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 1122 | |
| 1123 | Arguments: |
| 1124 | code points to start of the byte code |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 1125 | cb compile data block |
| 1126 | |
| 1127 | Returns: 0 for success |
| 1128 | -1 if a non-existant opcode is encountered |
| 1129 | */ |
| 1130 | |
| 1131 | int |
Elliott Hughes | 2dbd7d2 | 2020-06-03 14:32:37 -0700 | [diff] [blame] | 1132 | PRIV(auto_possessify)(PCRE2_UCHAR *code, const compile_block *cb) |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 1133 | { |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 1134 | PCRE2_UCHAR c; |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 1135 | PCRE2_SPTR end; |
| 1136 | PCRE2_UCHAR *repeat_opcode; |
| 1137 | uint32_t list[8]; |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 1138 | int rec_limit = 1000; /* Was 10,000 but clang+ASAN uses a lot of stack. */ |
Elliott Hughes | 2dbd7d2 | 2020-06-03 14:32:37 -0700 | [diff] [blame] | 1139 | BOOL utf = (cb->external_options & PCRE2_UTF) != 0; |
| 1140 | BOOL ucp = (cb->external_options & PCRE2_UCP) != 0; |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 1141 | |
| 1142 | for (;;) |
| 1143 | { |
| 1144 | c = *code; |
| 1145 | |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 1146 | if (c >= OP_TABLE_LENGTH) return -1; /* Something gone wrong */ |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 1147 | |
| 1148 | if (c >= OP_STAR && c <= OP_TYPEPOSUPTO) |
| 1149 | { |
| 1150 | c -= get_repeat_base(c) - OP_STAR; |
| 1151 | end = (c <= OP_MINUPTO) ? |
Elliott Hughes | 2dbd7d2 | 2020-06-03 14:32:37 -0700 | [diff] [blame] | 1152 | get_chr_property_list(code, utf, ucp, cb->fcc, list) : NULL; |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 1153 | list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO; |
| 1154 | |
Elliott Hughes | 2dbd7d2 | 2020-06-03 14:32:37 -0700 | [diff] [blame] | 1155 | if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end, |
| 1156 | &rec_limit)) |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 1157 | { |
| 1158 | switch(c) |
| 1159 | { |
| 1160 | case OP_STAR: |
| 1161 | *code += OP_POSSTAR - OP_STAR; |
| 1162 | break; |
| 1163 | |
| 1164 | case OP_MINSTAR: |
| 1165 | *code += OP_POSSTAR - OP_MINSTAR; |
| 1166 | break; |
| 1167 | |
| 1168 | case OP_PLUS: |
| 1169 | *code += OP_POSPLUS - OP_PLUS; |
| 1170 | break; |
| 1171 | |
| 1172 | case OP_MINPLUS: |
| 1173 | *code += OP_POSPLUS - OP_MINPLUS; |
| 1174 | break; |
| 1175 | |
| 1176 | case OP_QUERY: |
| 1177 | *code += OP_POSQUERY - OP_QUERY; |
| 1178 | break; |
| 1179 | |
| 1180 | case OP_MINQUERY: |
| 1181 | *code += OP_POSQUERY - OP_MINQUERY; |
| 1182 | break; |
| 1183 | |
| 1184 | case OP_UPTO: |
| 1185 | *code += OP_POSUPTO - OP_UPTO; |
| 1186 | break; |
| 1187 | |
| 1188 | case OP_MINUPTO: |
| 1189 | *code += OP_POSUPTO - OP_MINUPTO; |
| 1190 | break; |
| 1191 | } |
| 1192 | } |
| 1193 | c = *code; |
| 1194 | } |
| 1195 | else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS) |
| 1196 | { |
| 1197 | #ifdef SUPPORT_WIDE_CHARS |
| 1198 | if (c == OP_XCLASS) |
| 1199 | repeat_opcode = code + GET(code, 1); |
| 1200 | else |
| 1201 | #endif |
| 1202 | repeat_opcode = code + 1 + (32 / sizeof(PCRE2_UCHAR)); |
| 1203 | |
| 1204 | c = *repeat_opcode; |
| 1205 | if (c >= OP_CRSTAR && c <= OP_CRMINRANGE) |
| 1206 | { |
Elliott Hughes | 378b175 | 2021-06-08 13:42:40 -0700 | [diff] [blame] | 1207 | /* The return from get_chr_property_list() will never be NULL when |
| 1208 | *code (aka c) is one of the three class opcodes. However, gcc with |
| 1209 | -fanalyzer notes that a NULL return is possible, and grumbles. Hence we |
| 1210 | put in a check. */ |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 1211 | |
Elliott Hughes | 378b175 | 2021-06-08 13:42:40 -0700 | [diff] [blame] | 1212 | end = get_chr_property_list(code, utf, ucp, cb->fcc, list); |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 1213 | list[1] = (c & 1) == 0; |
| 1214 | |
Elliott Hughes | 378b175 | 2021-06-08 13:42:40 -0700 | [diff] [blame] | 1215 | if (end != NULL && |
| 1216 | compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit)) |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 1217 | { |
| 1218 | switch (c) |
| 1219 | { |
| 1220 | case OP_CRSTAR: |
| 1221 | case OP_CRMINSTAR: |
| 1222 | *repeat_opcode = OP_CRPOSSTAR; |
| 1223 | break; |
| 1224 | |
| 1225 | case OP_CRPLUS: |
| 1226 | case OP_CRMINPLUS: |
| 1227 | *repeat_opcode = OP_CRPOSPLUS; |
| 1228 | break; |
| 1229 | |
| 1230 | case OP_CRQUERY: |
| 1231 | case OP_CRMINQUERY: |
| 1232 | *repeat_opcode = OP_CRPOSQUERY; |
| 1233 | break; |
| 1234 | |
| 1235 | case OP_CRRANGE: |
| 1236 | case OP_CRMINRANGE: |
| 1237 | *repeat_opcode = OP_CRPOSRANGE; |
| 1238 | break; |
| 1239 | } |
| 1240 | } |
| 1241 | } |
| 1242 | c = *code; |
| 1243 | } |
| 1244 | |
| 1245 | switch(c) |
| 1246 | { |
| 1247 | case OP_END: |
| 1248 | return 0; |
| 1249 | |
| 1250 | case OP_TYPESTAR: |
| 1251 | case OP_TYPEMINSTAR: |
| 1252 | case OP_TYPEPLUS: |
| 1253 | case OP_TYPEMINPLUS: |
| 1254 | case OP_TYPEQUERY: |
| 1255 | case OP_TYPEMINQUERY: |
| 1256 | case OP_TYPEPOSSTAR: |
| 1257 | case OP_TYPEPOSPLUS: |
| 1258 | case OP_TYPEPOSQUERY: |
| 1259 | if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; |
| 1260 | break; |
| 1261 | |
| 1262 | case OP_TYPEUPTO: |
| 1263 | case OP_TYPEMINUPTO: |
| 1264 | case OP_TYPEEXACT: |
| 1265 | case OP_TYPEPOSUPTO: |
| 1266 | if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) |
| 1267 | code += 2; |
| 1268 | break; |
| 1269 | |
| 1270 | case OP_CALLOUT_STR: |
| 1271 | code += GET(code, 1 + 2*LINK_SIZE); |
| 1272 | break; |
| 1273 | |
| 1274 | #ifdef SUPPORT_WIDE_CHARS |
| 1275 | case OP_XCLASS: |
| 1276 | code += GET(code, 1); |
| 1277 | break; |
| 1278 | #endif |
| 1279 | |
| 1280 | case OP_MARK: |
Elliott Hughes | 653c210 | 2019-01-09 15:41:36 -0800 | [diff] [blame] | 1281 | case OP_COMMIT_ARG: |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 1282 | case OP_PRUNE_ARG: |
| 1283 | case OP_SKIP_ARG: |
| 1284 | case OP_THEN_ARG: |
| 1285 | code += code[1]; |
| 1286 | break; |
| 1287 | } |
| 1288 | |
| 1289 | /* Add in the fixed length from the table */ |
| 1290 | |
| 1291 | code += PRIV(OP_lengths)[c]; |
| 1292 | |
| 1293 | /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be |
| 1294 | followed by a multi-byte character. The length in the table is a minimum, so |
| 1295 | we have to arrange to skip the extra code units. */ |
| 1296 | |
| 1297 | #ifdef MAYBE_UTF_MULTI |
| 1298 | if (utf) switch(c) |
| 1299 | { |
| 1300 | case OP_CHAR: |
| 1301 | case OP_CHARI: |
| 1302 | case OP_NOT: |
| 1303 | case OP_NOTI: |
| 1304 | case OP_STAR: |
| 1305 | case OP_MINSTAR: |
| 1306 | case OP_PLUS: |
| 1307 | case OP_MINPLUS: |
| 1308 | case OP_QUERY: |
| 1309 | case OP_MINQUERY: |
| 1310 | case OP_UPTO: |
| 1311 | case OP_MINUPTO: |
| 1312 | case OP_EXACT: |
| 1313 | case OP_POSSTAR: |
| 1314 | case OP_POSPLUS: |
| 1315 | case OP_POSQUERY: |
| 1316 | case OP_POSUPTO: |
| 1317 | case OP_STARI: |
| 1318 | case OP_MINSTARI: |
| 1319 | case OP_PLUSI: |
| 1320 | case OP_MINPLUSI: |
| 1321 | case OP_QUERYI: |
| 1322 | case OP_MINQUERYI: |
| 1323 | case OP_UPTOI: |
| 1324 | case OP_MINUPTOI: |
| 1325 | case OP_EXACTI: |
| 1326 | case OP_POSSTARI: |
| 1327 | case OP_POSPLUSI: |
| 1328 | case OP_POSQUERYI: |
| 1329 | case OP_POSUPTOI: |
| 1330 | case OP_NOTSTAR: |
| 1331 | case OP_NOTMINSTAR: |
| 1332 | case OP_NOTPLUS: |
| 1333 | case OP_NOTMINPLUS: |
| 1334 | case OP_NOTQUERY: |
| 1335 | case OP_NOTMINQUERY: |
| 1336 | case OP_NOTUPTO: |
| 1337 | case OP_NOTMINUPTO: |
| 1338 | case OP_NOTEXACT: |
| 1339 | case OP_NOTPOSSTAR: |
| 1340 | case OP_NOTPOSPLUS: |
| 1341 | case OP_NOTPOSQUERY: |
| 1342 | case OP_NOTPOSUPTO: |
| 1343 | case OP_NOTSTARI: |
| 1344 | case OP_NOTMINSTARI: |
| 1345 | case OP_NOTPLUSI: |
| 1346 | case OP_NOTMINPLUSI: |
| 1347 | case OP_NOTQUERYI: |
| 1348 | case OP_NOTMINQUERYI: |
| 1349 | case OP_NOTUPTOI: |
| 1350 | case OP_NOTMINUPTOI: |
| 1351 | case OP_NOTEXACTI: |
| 1352 | case OP_NOTPOSSTARI: |
| 1353 | case OP_NOTPOSPLUSI: |
| 1354 | case OP_NOTPOSQUERYI: |
| 1355 | case OP_NOTPOSUPTOI: |
| 1356 | if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); |
| 1357 | break; |
| 1358 | } |
| 1359 | #else |
| 1360 | (void)(utf); /* Keep compiler happy by referencing function argument */ |
| 1361 | #endif /* SUPPORT_WIDE_CHARS */ |
| 1362 | } |
| 1363 | } |
| 1364 | |
| 1365 | /* End of pcre2_auto_possess.c */ |