| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 1 | /* | 
 | 2 |  * Secret Labs' Regular Expression Engine | 
 | 3 |  * | 
 | 4 |  * regular expression matching engine | 
 | 5 |  * | 
 | 6 |  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved. | 
 | 7 |  * | 
 | 8 |  * See the _sre.c file for information on usage and redistribution. | 
 | 9 |  */ | 
 | 10 |  | 
 | 11 | /* String matching engine */ | 
 | 12 |  | 
 | 13 | /* This file is included three times, with different character settings */ | 
 | 14 |  | 
 | 15 | LOCAL(int) | 
 | 16 | SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at) | 
 | 17 | { | 
 | 18 |     /* check if pointer is at given position */ | 
 | 19 |  | 
 | 20 |     Py_ssize_t thisp, thatp; | 
 | 21 |  | 
 | 22 |     switch (at) { | 
 | 23 |  | 
 | 24 |     case SRE_AT_BEGINNING: | 
 | 25 |     case SRE_AT_BEGINNING_STRING: | 
 | 26 |         return ((void*) ptr == state->beginning); | 
 | 27 |  | 
 | 28 |     case SRE_AT_BEGINNING_LINE: | 
 | 29 |         return ((void*) ptr == state->beginning || | 
 | 30 |                 SRE_IS_LINEBREAK((int) ptr[-1])); | 
 | 31 |  | 
 | 32 |     case SRE_AT_END: | 
| Serhiy Storchaka | 03d6ee3 | 2015-07-06 13:58:33 +0300 | [diff] [blame] | 33 |         return (((SRE_CHAR *)state->end - ptr == 1 && | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 34 |                  SRE_IS_LINEBREAK((int) ptr[0])) || | 
 | 35 |                 ((void*) ptr == state->end)); | 
 | 36 |  | 
 | 37 |     case SRE_AT_END_LINE: | 
 | 38 |         return ((void*) ptr == state->end || | 
 | 39 |                 SRE_IS_LINEBREAK((int) ptr[0])); | 
 | 40 |  | 
 | 41 |     case SRE_AT_END_STRING: | 
 | 42 |         return ((void*) ptr == state->end); | 
 | 43 |  | 
 | 44 |     case SRE_AT_BOUNDARY: | 
 | 45 |         if (state->beginning == state->end) | 
 | 46 |             return 0; | 
 | 47 |         thatp = ((void*) ptr > state->beginning) ? | 
 | 48 |             SRE_IS_WORD((int) ptr[-1]) : 0; | 
 | 49 |         thisp = ((void*) ptr < state->end) ? | 
 | 50 |             SRE_IS_WORD((int) ptr[0]) : 0; | 
 | 51 |         return thisp != thatp; | 
 | 52 |  | 
 | 53 |     case SRE_AT_NON_BOUNDARY: | 
 | 54 |         if (state->beginning == state->end) | 
 | 55 |             return 0; | 
 | 56 |         thatp = ((void*) ptr > state->beginning) ? | 
 | 57 |             SRE_IS_WORD((int) ptr[-1]) : 0; | 
 | 58 |         thisp = ((void*) ptr < state->end) ? | 
 | 59 |             SRE_IS_WORD((int) ptr[0]) : 0; | 
 | 60 |         return thisp == thatp; | 
 | 61 |  | 
 | 62 |     case SRE_AT_LOC_BOUNDARY: | 
 | 63 |         if (state->beginning == state->end) | 
 | 64 |             return 0; | 
 | 65 |         thatp = ((void*) ptr > state->beginning) ? | 
 | 66 |             SRE_LOC_IS_WORD((int) ptr[-1]) : 0; | 
 | 67 |         thisp = ((void*) ptr < state->end) ? | 
 | 68 |             SRE_LOC_IS_WORD((int) ptr[0]) : 0; | 
 | 69 |         return thisp != thatp; | 
 | 70 |  | 
 | 71 |     case SRE_AT_LOC_NON_BOUNDARY: | 
 | 72 |         if (state->beginning == state->end) | 
 | 73 |             return 0; | 
 | 74 |         thatp = ((void*) ptr > state->beginning) ? | 
 | 75 |             SRE_LOC_IS_WORD((int) ptr[-1]) : 0; | 
 | 76 |         thisp = ((void*) ptr < state->end) ? | 
 | 77 |             SRE_LOC_IS_WORD((int) ptr[0]) : 0; | 
 | 78 |         return thisp == thatp; | 
 | 79 |  | 
 | 80 |     case SRE_AT_UNI_BOUNDARY: | 
 | 81 |         if (state->beginning == state->end) | 
 | 82 |             return 0; | 
 | 83 |         thatp = ((void*) ptr > state->beginning) ? | 
 | 84 |             SRE_UNI_IS_WORD((int) ptr[-1]) : 0; | 
 | 85 |         thisp = ((void*) ptr < state->end) ? | 
 | 86 |             SRE_UNI_IS_WORD((int) ptr[0]) : 0; | 
 | 87 |         return thisp != thatp; | 
 | 88 |  | 
 | 89 |     case SRE_AT_UNI_NON_BOUNDARY: | 
 | 90 |         if (state->beginning == state->end) | 
 | 91 |             return 0; | 
 | 92 |         thatp = ((void*) ptr > state->beginning) ? | 
 | 93 |             SRE_UNI_IS_WORD((int) ptr[-1]) : 0; | 
 | 94 |         thisp = ((void*) ptr < state->end) ? | 
 | 95 |             SRE_UNI_IS_WORD((int) ptr[0]) : 0; | 
 | 96 |         return thisp == thatp; | 
 | 97 |  | 
 | 98 |     } | 
 | 99 |  | 
 | 100 |     return 0; | 
 | 101 | } | 
 | 102 |  | 
 | 103 | LOCAL(int) | 
| Serhiy Storchaka | 4b8f894 | 2014-10-31 12:36:56 +0200 | [diff] [blame] | 104 | SRE(charset)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch) | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 105 | { | 
 | 106 |     /* check if character is a member of the given set */ | 
 | 107 |  | 
 | 108 |     int ok = 1; | 
 | 109 |  | 
 | 110 |     for (;;) { | 
 | 111 |         switch (*set++) { | 
 | 112 |  | 
 | 113 |         case SRE_OP_FAILURE: | 
 | 114 |             return !ok; | 
 | 115 |  | 
 | 116 |         case SRE_OP_LITERAL: | 
 | 117 |             /* <LITERAL> <code> */ | 
 | 118 |             if (ch == set[0]) | 
 | 119 |                 return ok; | 
 | 120 |             set++; | 
 | 121 |             break; | 
 | 122 |  | 
 | 123 |         case SRE_OP_CATEGORY: | 
 | 124 |             /* <CATEGORY> <code> */ | 
 | 125 |             if (sre_category(set[0], (int) ch)) | 
 | 126 |                 return ok; | 
 | 127 |             set++; | 
 | 128 |             break; | 
 | 129 |  | 
 | 130 |         case SRE_OP_CHARSET: | 
 | 131 |             /* <CHARSET> <bitmap> */ | 
 | 132 |             if (ch < 256 && | 
 | 133 |                 (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1))))) | 
 | 134 |                 return ok; | 
 | 135 |             set += 256/SRE_CODE_BITS; | 
 | 136 |             break; | 
 | 137 |  | 
 | 138 |         case SRE_OP_RANGE: | 
 | 139 |             /* <RANGE> <lower> <upper> */ | 
 | 140 |             if (set[0] <= ch && ch <= set[1]) | 
 | 141 |                 return ok; | 
 | 142 |             set += 2; | 
 | 143 |             break; | 
 | 144 |  | 
| Serhiy Storchaka | 4b8f894 | 2014-10-31 12:36:56 +0200 | [diff] [blame] | 145 |         case SRE_OP_RANGE_IGNORE: | 
 | 146 |             /* <RANGE_IGNORE> <lower> <upper> */ | 
 | 147 |         { | 
 | 148 |             SRE_CODE uch; | 
 | 149 |             /* ch is already lower cased */ | 
 | 150 |             if (set[0] <= ch && ch <= set[1]) | 
 | 151 |                 return ok; | 
 | 152 |             uch = state->upper(ch); | 
 | 153 |             if (set[0] <= uch && uch <= set[1]) | 
 | 154 |                 return ok; | 
 | 155 |             set += 2; | 
 | 156 |             break; | 
 | 157 |         } | 
 | 158 |  | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 159 |         case SRE_OP_NEGATE: | 
 | 160 |             ok = !ok; | 
 | 161 |             break; | 
 | 162 |  | 
 | 163 |         case SRE_OP_BIGCHARSET: | 
 | 164 |             /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */ | 
 | 165 |         { | 
 | 166 |             Py_ssize_t count, block; | 
 | 167 |             count = *(set++); | 
 | 168 |  | 
 | 169 |             if (ch < 0x10000u) | 
 | 170 |                 block = ((unsigned char*)set)[ch >> 8]; | 
 | 171 |             else | 
 | 172 |                 block = -1; | 
 | 173 |             set += 256/sizeof(SRE_CODE); | 
 | 174 |             if (block >=0 && | 
 | 175 |                 (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] & | 
 | 176 |                     (1u << (ch & (SRE_CODE_BITS-1))))) | 
 | 177 |                 return ok; | 
 | 178 |             set += count * (256/SRE_CODE_BITS); | 
 | 179 |             break; | 
 | 180 |         } | 
 | 181 |  | 
 | 182 |         default: | 
 | 183 |             /* internal error -- there's not much we can do about it | 
 | 184 |                here, so let's just pretend it didn't match... */ | 
 | 185 |             return 0; | 
 | 186 |         } | 
 | 187 |     } | 
 | 188 | } | 
 | 189 |  | 
| Serhiy Storchaka | 429b59e | 2014-05-14 21:48:17 +0300 | [diff] [blame] | 190 | LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all); | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 191 |  | 
 | 192 | LOCAL(Py_ssize_t) | 
 | 193 | SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount) | 
 | 194 | { | 
 | 195 |     SRE_CODE chr; | 
 | 196 |     SRE_CHAR c; | 
 | 197 |     SRE_CHAR* ptr = (SRE_CHAR *)state->ptr; | 
 | 198 |     SRE_CHAR* end = (SRE_CHAR *)state->end; | 
 | 199 |     Py_ssize_t i; | 
 | 200 |  | 
 | 201 |     /* adjust end */ | 
 | 202 |     if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT) | 
 | 203 |         end = ptr + maxcount; | 
 | 204 |  | 
 | 205 |     switch (pattern[0]) { | 
 | 206 |  | 
 | 207 |     case SRE_OP_IN: | 
 | 208 |         /* repeated set */ | 
 | 209 |         TRACE(("|%p|%p|COUNT IN\n", pattern, ptr)); | 
| Serhiy Storchaka | 4b8f894 | 2014-10-31 12:36:56 +0200 | [diff] [blame] | 210 |         while (ptr < end && SRE(charset)(state, pattern + 2, *ptr)) | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 211 |             ptr++; | 
 | 212 |         break; | 
 | 213 |  | 
 | 214 |     case SRE_OP_ANY: | 
 | 215 |         /* repeated dot wildcard. */ | 
 | 216 |         TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr)); | 
 | 217 |         while (ptr < end && !SRE_IS_LINEBREAK(*ptr)) | 
 | 218 |             ptr++; | 
 | 219 |         break; | 
 | 220 |  | 
 | 221 |     case SRE_OP_ANY_ALL: | 
 | 222 |         /* repeated dot wildcard.  skip to the end of the target | 
 | 223 |            string, and backtrack from there */ | 
 | 224 |         TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr)); | 
 | 225 |         ptr = end; | 
 | 226 |         break; | 
 | 227 |  | 
 | 228 |     case SRE_OP_LITERAL: | 
 | 229 |         /* repeated literal */ | 
 | 230 |         chr = pattern[1]; | 
 | 231 |         TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr)); | 
 | 232 |         c = (SRE_CHAR) chr; | 
 | 233 | #if SIZEOF_SRE_CHAR < 4 | 
 | 234 |         if ((SRE_CODE) c != chr) | 
 | 235 |             ; /* literal can't match: doesn't fit in char width */ | 
 | 236 |         else | 
 | 237 | #endif | 
 | 238 |         while (ptr < end && *ptr == c) | 
 | 239 |             ptr++; | 
 | 240 |         break; | 
 | 241 |  | 
 | 242 |     case SRE_OP_LITERAL_IGNORE: | 
 | 243 |         /* repeated literal */ | 
 | 244 |         chr = pattern[1]; | 
 | 245 |         TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr)); | 
 | 246 |         while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr) | 
 | 247 |             ptr++; | 
 | 248 |         break; | 
 | 249 |  | 
 | 250 |     case SRE_OP_NOT_LITERAL: | 
 | 251 |         /* repeated non-literal */ | 
 | 252 |         chr = pattern[1]; | 
 | 253 |         TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr)); | 
 | 254 |         c = (SRE_CHAR) chr; | 
 | 255 | #if SIZEOF_SRE_CHAR < 4 | 
 | 256 |         if ((SRE_CODE) c != chr) | 
 | 257 |             ptr = end; /* literal can't match: doesn't fit in char width */ | 
 | 258 |         else | 
 | 259 | #endif | 
 | 260 |         while (ptr < end && *ptr != c) | 
 | 261 |             ptr++; | 
 | 262 |         break; | 
 | 263 |  | 
 | 264 |     case SRE_OP_NOT_LITERAL_IGNORE: | 
 | 265 |         /* repeated non-literal */ | 
 | 266 |         chr = pattern[1]; | 
 | 267 |         TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr)); | 
 | 268 |         while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr) | 
 | 269 |             ptr++; | 
 | 270 |         break; | 
 | 271 |  | 
 | 272 |     default: | 
 | 273 |         /* repeated single character pattern */ | 
 | 274 |         TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); | 
 | 275 |         while ((SRE_CHAR*) state->ptr < end) { | 
| Serhiy Storchaka | 429b59e | 2014-05-14 21:48:17 +0300 | [diff] [blame] | 276 |             i = SRE(match)(state, pattern, 0); | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 277 |             if (i < 0) | 
 | 278 |                 return i; | 
 | 279 |             if (!i) | 
 | 280 |                 break; | 
 | 281 |         } | 
 | 282 |         TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr, | 
 | 283 |                (SRE_CHAR*) state->ptr - ptr)); | 
 | 284 |         return (SRE_CHAR*) state->ptr - ptr; | 
 | 285 |     } | 
 | 286 |  | 
 | 287 |     TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr, | 
 | 288 |            ptr - (SRE_CHAR*) state->ptr)); | 
 | 289 |     return ptr - (SRE_CHAR*) state->ptr; | 
 | 290 | } | 
 | 291 |  | 
 | 292 | #if 0 /* not used in this release */ | 
 | 293 | LOCAL(int) | 
 | 294 | SRE(info)(SRE_STATE* state, SRE_CODE* pattern) | 
 | 295 | { | 
 | 296 |     /* check if an SRE_OP_INFO block matches at the current position. | 
 | 297 |        returns the number of SRE_CODE objects to skip if successful, 0 | 
 | 298 |        if no match */ | 
 | 299 |  | 
 | 300 |     SRE_CHAR* end = (SRE_CHAR*) state->end; | 
 | 301 |     SRE_CHAR* ptr = (SRE_CHAR*) state->ptr; | 
 | 302 |     Py_ssize_t i; | 
 | 303 |  | 
 | 304 |     /* check minimal length */ | 
 | 305 |     if (pattern[3] && end - ptr < pattern[3]) | 
 | 306 |         return 0; | 
 | 307 |  | 
 | 308 |     /* check known prefix */ | 
 | 309 |     if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) { | 
 | 310 |         /* <length> <skip> <prefix data> <overlap data> */ | 
 | 311 |         for (i = 0; i < pattern[5]; i++) | 
 | 312 |             if ((SRE_CODE) ptr[i] != pattern[7 + i]) | 
 | 313 |                 return 0; | 
 | 314 |         return pattern[0] + 2 * pattern[6]; | 
 | 315 |     } | 
 | 316 |     return pattern[0]; | 
 | 317 | } | 
 | 318 | #endif | 
 | 319 |  | 
 | 320 | /* The macros below should be used to protect recursive SRE(match)() | 
 | 321 |  * calls that *failed* and do *not* return immediately (IOW, those | 
 | 322 |  * that will backtrack). Explaining: | 
 | 323 |  * | 
 | 324 |  * - Recursive SRE(match)() returned true: that's usually a success | 
 | 325 |  *   (besides atypical cases like ASSERT_NOT), therefore there's no | 
 | 326 |  *   reason to restore lastmark; | 
 | 327 |  * | 
 | 328 |  * - Recursive SRE(match)() returned false but the current SRE(match)() | 
 | 329 |  *   is returning to the caller: If the current SRE(match)() is the | 
 | 330 |  *   top function of the recursion, returning false will be a matching | 
 | 331 |  *   failure, and it doesn't matter where lastmark is pointing to. | 
 | 332 |  *   If it's *not* the top function, it will be a recursive SRE(match)() | 
 | 333 |  *   failure by itself, and the calling SRE(match)() will have to deal | 
 | 334 |  *   with the failure by the same rules explained here (it will restore | 
 | 335 |  *   lastmark by itself if necessary); | 
 | 336 |  * | 
 | 337 |  * - Recursive SRE(match)() returned false, and will continue the | 
 | 338 |  *   outside 'for' loop: must be protected when breaking, since the next | 
 | 339 |  *   OP could potentially depend on lastmark; | 
 | 340 |  * | 
 | 341 |  * - Recursive SRE(match)() returned false, and will be called again | 
 | 342 |  *   inside a local for/while loop: must be protected between each | 
 | 343 |  *   loop iteration, since the recursive SRE(match)() could do anything, | 
 | 344 |  *   and could potentially depend on lastmark. | 
 | 345 |  * | 
 | 346 |  * For more information, check the discussion at SF patch #712900. | 
 | 347 |  */ | 
 | 348 | #define LASTMARK_SAVE()     \ | 
 | 349 |     do { \ | 
 | 350 |         ctx->lastmark = state->lastmark; \ | 
 | 351 |         ctx->lastindex = state->lastindex; \ | 
 | 352 |     } while (0) | 
 | 353 | #define LASTMARK_RESTORE()  \ | 
 | 354 |     do { \ | 
 | 355 |         state->lastmark = ctx->lastmark; \ | 
 | 356 |         state->lastindex = ctx->lastindex; \ | 
 | 357 |     } while (0) | 
 | 358 |  | 
 | 359 | #define RETURN_ERROR(i) do { return i; } while(0) | 
 | 360 | #define RETURN_FAILURE do { ret = 0; goto exit; } while(0) | 
 | 361 | #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0) | 
 | 362 |  | 
 | 363 | #define RETURN_ON_ERROR(i) \ | 
 | 364 |     do { if (i < 0) RETURN_ERROR(i); } while (0) | 
 | 365 | #define RETURN_ON_SUCCESS(i) \ | 
 | 366 |     do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0) | 
 | 367 | #define RETURN_ON_FAILURE(i) \ | 
 | 368 |     do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0) | 
 | 369 |  | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 370 | #define DATA_STACK_ALLOC(state, type, ptr) \ | 
 | 371 | do { \ | 
 | 372 |     alloc_pos = state->data_stack_base; \ | 
 | 373 |     TRACE(("allocating %s in %" PY_FORMAT_SIZE_T "d " \ | 
 | 374 |            "(%" PY_FORMAT_SIZE_T "d)\n", \ | 
| Serhiy Storchaka | 12b2538 | 2015-11-05 17:43:42 +0200 | [diff] [blame] | 375 |            Py_STRINGIFY(type), alloc_pos, sizeof(type))); \ | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 376 |     if (sizeof(type) > state->data_stack_size - alloc_pos) { \ | 
 | 377 |         int j = data_stack_grow(state, sizeof(type)); \ | 
 | 378 |         if (j < 0) return j; \ | 
 | 379 |         if (ctx_pos != -1) \ | 
 | 380 |             DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \ | 
 | 381 |     } \ | 
 | 382 |     ptr = (type*)(state->data_stack+alloc_pos); \ | 
 | 383 |     state->data_stack_base += sizeof(type); \ | 
 | 384 | } while (0) | 
 | 385 |  | 
 | 386 | #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \ | 
 | 387 | do { \ | 
| Serhiy Storchaka | 12b2538 | 2015-11-05 17:43:42 +0200 | [diff] [blame] | 388 |     TRACE(("looking up %s at %" PY_FORMAT_SIZE_T "d\n", Py_STRINGIFY(type), pos)); \ | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 389 |     ptr = (type*)(state->data_stack+pos); \ | 
 | 390 | } while (0) | 
 | 391 |  | 
 | 392 | #define DATA_STACK_PUSH(state, data, size) \ | 
 | 393 | do { \ | 
 | 394 |     TRACE(("copy data in %p to %" PY_FORMAT_SIZE_T "d " \ | 
 | 395 |            "(%" PY_FORMAT_SIZE_T "d)\n", \ | 
 | 396 |            data, state->data_stack_base, size)); \ | 
 | 397 |     if (size > state->data_stack_size - state->data_stack_base) { \ | 
 | 398 |         int j = data_stack_grow(state, size); \ | 
 | 399 |         if (j < 0) return j; \ | 
 | 400 |         if (ctx_pos != -1) \ | 
 | 401 |             DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \ | 
 | 402 |     } \ | 
 | 403 |     memcpy(state->data_stack+state->data_stack_base, data, size); \ | 
 | 404 |     state->data_stack_base += size; \ | 
 | 405 | } while (0) | 
 | 406 |  | 
 | 407 | #define DATA_STACK_POP(state, data, size, discard) \ | 
 | 408 | do { \ | 
 | 409 |     TRACE(("copy data to %p from %" PY_FORMAT_SIZE_T "d " \ | 
 | 410 |            "(%" PY_FORMAT_SIZE_T "d)\n", \ | 
 | 411 |            data, state->data_stack_base-size, size)); \ | 
 | 412 |     memcpy(data, state->data_stack+state->data_stack_base-size, size); \ | 
 | 413 |     if (discard) \ | 
 | 414 |         state->data_stack_base -= size; \ | 
 | 415 | } while (0) | 
 | 416 |  | 
 | 417 | #define DATA_STACK_POP_DISCARD(state, size) \ | 
 | 418 | do { \ | 
 | 419 |     TRACE(("discard data from %" PY_FORMAT_SIZE_T "d " \ | 
 | 420 |            "(%" PY_FORMAT_SIZE_T "d)\n", \ | 
 | 421 |            state->data_stack_base-size, size)); \ | 
 | 422 |     state->data_stack_base -= size; \ | 
 | 423 | } while(0) | 
 | 424 |  | 
 | 425 | #define DATA_PUSH(x) \ | 
 | 426 |     DATA_STACK_PUSH(state, (x), sizeof(*(x))) | 
 | 427 | #define DATA_POP(x) \ | 
 | 428 |     DATA_STACK_POP(state, (x), sizeof(*(x)), 1) | 
 | 429 | #define DATA_POP_DISCARD(x) \ | 
 | 430 |     DATA_STACK_POP_DISCARD(state, sizeof(*(x))) | 
 | 431 | #define DATA_ALLOC(t,p) \ | 
 | 432 |     DATA_STACK_ALLOC(state, t, p) | 
 | 433 | #define DATA_LOOKUP_AT(t,p,pos) \ | 
 | 434 |     DATA_STACK_LOOKUP_AT(state,t,p,pos) | 
 | 435 |  | 
 | 436 | #define MARK_PUSH(lastmark) \ | 
 | 437 |     do if (lastmark > 0) { \ | 
 | 438 |         i = lastmark; /* ctx->lastmark may change if reallocated */ \ | 
 | 439 |         DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \ | 
 | 440 |     } while (0) | 
 | 441 | #define MARK_POP(lastmark) \ | 
 | 442 |     do if (lastmark > 0) { \ | 
 | 443 |         DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \ | 
 | 444 |     } while (0) | 
 | 445 | #define MARK_POP_KEEP(lastmark) \ | 
 | 446 |     do if (lastmark > 0) { \ | 
 | 447 |         DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \ | 
 | 448 |     } while (0) | 
 | 449 | #define MARK_POP_DISCARD(lastmark) \ | 
 | 450 |     do if (lastmark > 0) { \ | 
 | 451 |         DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \ | 
 | 452 |     } while (0) | 
 | 453 |  | 
 | 454 | #define JUMP_NONE            0 | 
 | 455 | #define JUMP_MAX_UNTIL_1     1 | 
 | 456 | #define JUMP_MAX_UNTIL_2     2 | 
 | 457 | #define JUMP_MAX_UNTIL_3     3 | 
 | 458 | #define JUMP_MIN_UNTIL_1     4 | 
 | 459 | #define JUMP_MIN_UNTIL_2     5 | 
 | 460 | #define JUMP_MIN_UNTIL_3     6 | 
 | 461 | #define JUMP_REPEAT          7 | 
 | 462 | #define JUMP_REPEAT_ONE_1    8 | 
 | 463 | #define JUMP_REPEAT_ONE_2    9 | 
 | 464 | #define JUMP_MIN_REPEAT_ONE  10 | 
 | 465 | #define JUMP_BRANCH          11 | 
 | 466 | #define JUMP_ASSERT          12 | 
 | 467 | #define JUMP_ASSERT_NOT      13 | 
 | 468 |  | 
| Serhiy Storchaka | 32eddc1 | 2013-11-23 23:20:30 +0200 | [diff] [blame] | 469 | #define DO_JUMPX(jumpvalue, jumplabel, nextpattern, matchall) \ | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 470 |     DATA_ALLOC(SRE(match_context), nextctx); \ | 
 | 471 |     nextctx->last_ctx_pos = ctx_pos; \ | 
 | 472 |     nextctx->jump = jumpvalue; \ | 
 | 473 |     nextctx->pattern = nextpattern; \ | 
| Serhiy Storchaka | 32eddc1 | 2013-11-23 23:20:30 +0200 | [diff] [blame] | 474 |     nextctx->match_all = matchall; \ | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 475 |     ctx_pos = alloc_pos; \ | 
 | 476 |     ctx = nextctx; \ | 
 | 477 |     goto entrance; \ | 
 | 478 |     jumplabel: \ | 
 | 479 |     while (0) /* gcc doesn't like labels at end of scopes */ \ | 
 | 480 |  | 
| Serhiy Storchaka | 32eddc1 | 2013-11-23 23:20:30 +0200 | [diff] [blame] | 481 | #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \ | 
 | 482 |     DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->match_all) | 
 | 483 |  | 
 | 484 | #define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \ | 
 | 485 |     DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0) | 
 | 486 |  | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 487 | typedef struct { | 
 | 488 |     Py_ssize_t last_ctx_pos; | 
 | 489 |     Py_ssize_t jump; | 
 | 490 |     SRE_CHAR* ptr; | 
 | 491 |     SRE_CODE* pattern; | 
 | 492 |     Py_ssize_t count; | 
 | 493 |     Py_ssize_t lastmark; | 
 | 494 |     Py_ssize_t lastindex; | 
 | 495 |     union { | 
 | 496 |         SRE_CODE chr; | 
 | 497 |         SRE_REPEAT* rep; | 
 | 498 |     } u; | 
| Serhiy Storchaka | 32eddc1 | 2013-11-23 23:20:30 +0200 | [diff] [blame] | 499 |     int match_all; | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 500 | } SRE(match_context); | 
 | 501 |  | 
 | 502 | /* check if string matches the given pattern.  returns <0 for | 
 | 503 |    error, 0 for failure, and 1 for success */ | 
 | 504 | LOCAL(Py_ssize_t) | 
| Serhiy Storchaka | 429b59e | 2014-05-14 21:48:17 +0300 | [diff] [blame] | 505 | SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all) | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 506 | { | 
 | 507 |     SRE_CHAR* end = (SRE_CHAR *)state->end; | 
 | 508 |     Py_ssize_t alloc_pos, ctx_pos = -1; | 
 | 509 |     Py_ssize_t i, ret = 0; | 
 | 510 |     Py_ssize_t jump; | 
 | 511 |     unsigned int sigcount=0; | 
 | 512 |  | 
 | 513 |     SRE(match_context)* ctx; | 
 | 514 |     SRE(match_context)* nextctx; | 
 | 515 |  | 
 | 516 |     TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); | 
 | 517 |  | 
 | 518 |     DATA_ALLOC(SRE(match_context), ctx); | 
 | 519 |     ctx->last_ctx_pos = -1; | 
 | 520 |     ctx->jump = JUMP_NONE; | 
 | 521 |     ctx->pattern = pattern; | 
| Serhiy Storchaka | 429b59e | 2014-05-14 21:48:17 +0300 | [diff] [blame] | 522 |     ctx->match_all = match_all; | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 523 |     ctx_pos = alloc_pos; | 
 | 524 |  | 
 | 525 | entrance: | 
 | 526 |  | 
 | 527 |     ctx->ptr = (SRE_CHAR *)state->ptr; | 
 | 528 |  | 
 | 529 |     if (ctx->pattern[0] == SRE_OP_INFO) { | 
 | 530 |         /* optimization info block */ | 
 | 531 |         /* <INFO> <1=skip> <2=flags> <3=min> ... */ | 
 | 532 |         if (ctx->pattern[3] && (Py_uintptr_t)(end - ctx->ptr) < ctx->pattern[3]) { | 
 | 533 |             TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, " | 
 | 534 |                    "need %" PY_FORMAT_SIZE_T "d)\n", | 
 | 535 |                    end - ctx->ptr, (Py_ssize_t) ctx->pattern[3])); | 
 | 536 |             RETURN_FAILURE; | 
 | 537 |         } | 
 | 538 |         ctx->pattern += ctx->pattern[1] + 1; | 
 | 539 |     } | 
 | 540 |  | 
 | 541 |     for (;;) { | 
 | 542 |         ++sigcount; | 
 | 543 |         if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals()) | 
 | 544 |             RETURN_ERROR(SRE_ERROR_INTERRUPTED); | 
 | 545 |  | 
 | 546 |         switch (*ctx->pattern++) { | 
 | 547 |  | 
 | 548 |         case SRE_OP_MARK: | 
 | 549 |             /* set mark */ | 
 | 550 |             /* <MARK> <gid> */ | 
 | 551 |             TRACE(("|%p|%p|MARK %d\n", ctx->pattern, | 
 | 552 |                    ctx->ptr, ctx->pattern[0])); | 
 | 553 |             i = ctx->pattern[0]; | 
 | 554 |             if (i & 1) | 
 | 555 |                 state->lastindex = i/2 + 1; | 
 | 556 |             if (i > state->lastmark) { | 
 | 557 |                 /* state->lastmark is the highest valid index in the | 
 | 558 |                    state->mark array.  If it is increased by more than 1, | 
 | 559 |                    the intervening marks must be set to NULL to signal | 
 | 560 |                    that these marks have not been encountered. */ | 
 | 561 |                 Py_ssize_t j = state->lastmark + 1; | 
 | 562 |                 while (j < i) | 
 | 563 |                     state->mark[j++] = NULL; | 
 | 564 |                 state->lastmark = i; | 
 | 565 |             } | 
 | 566 |             state->mark[i] = ctx->ptr; | 
 | 567 |             ctx->pattern++; | 
 | 568 |             break; | 
 | 569 |  | 
 | 570 |         case SRE_OP_LITERAL: | 
 | 571 |             /* match literal string */ | 
 | 572 |             /* <LITERAL> <code> */ | 
 | 573 |             TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern, | 
 | 574 |                    ctx->ptr, *ctx->pattern)); | 
 | 575 |             if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0]) | 
 | 576 |                 RETURN_FAILURE; | 
 | 577 |             ctx->pattern++; | 
 | 578 |             ctx->ptr++; | 
 | 579 |             break; | 
 | 580 |  | 
 | 581 |         case SRE_OP_NOT_LITERAL: | 
 | 582 |             /* match anything that is not literal character */ | 
 | 583 |             /* <NOT_LITERAL> <code> */ | 
 | 584 |             TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern, | 
 | 585 |                    ctx->ptr, *ctx->pattern)); | 
 | 586 |             if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0]) | 
 | 587 |                 RETURN_FAILURE; | 
 | 588 |             ctx->pattern++; | 
 | 589 |             ctx->ptr++; | 
 | 590 |             break; | 
 | 591 |  | 
 | 592 |         case SRE_OP_SUCCESS: | 
 | 593 |             /* end of pattern */ | 
 | 594 |             TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr)); | 
| Serhiy Storchaka | 32eddc1 | 2013-11-23 23:20:30 +0200 | [diff] [blame] | 595 |             if (!ctx->match_all || ctx->ptr == state->end) { | 
 | 596 |                 state->ptr = ctx->ptr; | 
 | 597 |                 RETURN_SUCCESS; | 
 | 598 |             } | 
 | 599 |             RETURN_FAILURE; | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 600 |  | 
 | 601 |         case SRE_OP_AT: | 
 | 602 |             /* match at given position */ | 
 | 603 |             /* <AT> <code> */ | 
 | 604 |             TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern)); | 
 | 605 |             if (!SRE(at)(state, ctx->ptr, *ctx->pattern)) | 
 | 606 |                 RETURN_FAILURE; | 
 | 607 |             ctx->pattern++; | 
 | 608 |             break; | 
 | 609 |  | 
 | 610 |         case SRE_OP_CATEGORY: | 
 | 611 |             /* match at given category */ | 
 | 612 |             /* <CATEGORY> <code> */ | 
 | 613 |             TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern, | 
 | 614 |                    ctx->ptr, *ctx->pattern)); | 
 | 615 |             if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0])) | 
 | 616 |                 RETURN_FAILURE; | 
 | 617 |             ctx->pattern++; | 
 | 618 |             ctx->ptr++; | 
 | 619 |             break; | 
 | 620 |  | 
 | 621 |         case SRE_OP_ANY: | 
 | 622 |             /* match anything (except a newline) */ | 
 | 623 |             /* <ANY> */ | 
 | 624 |             TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr)); | 
 | 625 |             if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0])) | 
 | 626 |                 RETURN_FAILURE; | 
 | 627 |             ctx->ptr++; | 
 | 628 |             break; | 
 | 629 |  | 
 | 630 |         case SRE_OP_ANY_ALL: | 
 | 631 |             /* match anything */ | 
 | 632 |             /* <ANY_ALL> */ | 
 | 633 |             TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr)); | 
 | 634 |             if (ctx->ptr >= end) | 
 | 635 |                 RETURN_FAILURE; | 
 | 636 |             ctx->ptr++; | 
 | 637 |             break; | 
 | 638 |  | 
 | 639 |         case SRE_OP_IN: | 
 | 640 |             /* match set member (or non_member) */ | 
 | 641 |             /* <IN> <skip> <set> */ | 
 | 642 |             TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr)); | 
| Serhiy Storchaka | 4b8f894 | 2014-10-31 12:36:56 +0200 | [diff] [blame] | 643 |             if (ctx->ptr >= end || | 
 | 644 |                 !SRE(charset)(state, ctx->pattern + 1, *ctx->ptr)) | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 645 |                 RETURN_FAILURE; | 
 | 646 |             ctx->pattern += ctx->pattern[0]; | 
 | 647 |             ctx->ptr++; | 
 | 648 |             break; | 
 | 649 |  | 
 | 650 |         case SRE_OP_LITERAL_IGNORE: | 
 | 651 |             TRACE(("|%p|%p|LITERAL_IGNORE %d\n", | 
 | 652 |                    ctx->pattern, ctx->ptr, ctx->pattern[0])); | 
 | 653 |             if (ctx->ptr >= end || | 
 | 654 |                 state->lower(*ctx->ptr) != state->lower(*ctx->pattern)) | 
 | 655 |                 RETURN_FAILURE; | 
 | 656 |             ctx->pattern++; | 
 | 657 |             ctx->ptr++; | 
 | 658 |             break; | 
 | 659 |  | 
 | 660 |         case SRE_OP_NOT_LITERAL_IGNORE: | 
 | 661 |             TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", | 
 | 662 |                    ctx->pattern, ctx->ptr, *ctx->pattern)); | 
 | 663 |             if (ctx->ptr >= end || | 
 | 664 |                 state->lower(*ctx->ptr) == state->lower(*ctx->pattern)) | 
 | 665 |                 RETURN_FAILURE; | 
 | 666 |             ctx->pattern++; | 
 | 667 |             ctx->ptr++; | 
 | 668 |             break; | 
 | 669 |  | 
 | 670 |         case SRE_OP_IN_IGNORE: | 
 | 671 |             TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr)); | 
 | 672 |             if (ctx->ptr >= end | 
| Serhiy Storchaka | 4b8f894 | 2014-10-31 12:36:56 +0200 | [diff] [blame] | 673 |                 || !SRE(charset)(state, ctx->pattern+1, | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 674 |                                  (SRE_CODE)state->lower(*ctx->ptr))) | 
 | 675 |                 RETURN_FAILURE; | 
 | 676 |             ctx->pattern += ctx->pattern[0]; | 
 | 677 |             ctx->ptr++; | 
 | 678 |             break; | 
 | 679 |  | 
 | 680 |         case SRE_OP_JUMP: | 
 | 681 |         case SRE_OP_INFO: | 
 | 682 |             /* jump forward */ | 
 | 683 |             /* <JUMP> <offset> */ | 
 | 684 |             TRACE(("|%p|%p|JUMP %d\n", ctx->pattern, | 
 | 685 |                    ctx->ptr, ctx->pattern[0])); | 
 | 686 |             ctx->pattern += ctx->pattern[0]; | 
 | 687 |             break; | 
 | 688 |  | 
 | 689 |         case SRE_OP_BRANCH: | 
 | 690 |             /* alternation */ | 
 | 691 |             /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */ | 
 | 692 |             TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr)); | 
 | 693 |             LASTMARK_SAVE(); | 
 | 694 |             ctx->u.rep = state->repeat; | 
 | 695 |             if (ctx->u.rep) | 
 | 696 |                 MARK_PUSH(ctx->lastmark); | 
 | 697 |             for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) { | 
 | 698 |                 if (ctx->pattern[1] == SRE_OP_LITERAL && | 
 | 699 |                     (ctx->ptr >= end || | 
 | 700 |                      (SRE_CODE) *ctx->ptr != ctx->pattern[2])) | 
 | 701 |                     continue; | 
 | 702 |                 if (ctx->pattern[1] == SRE_OP_IN && | 
 | 703 |                     (ctx->ptr >= end || | 
| Serhiy Storchaka | 4b8f894 | 2014-10-31 12:36:56 +0200 | [diff] [blame] | 704 |                      !SRE(charset)(state, ctx->pattern + 3, | 
 | 705 |                                    (SRE_CODE) *ctx->ptr))) | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 706 |                     continue; | 
 | 707 |                 state->ptr = ctx->ptr; | 
 | 708 |                 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1); | 
 | 709 |                 if (ret) { | 
 | 710 |                     if (ctx->u.rep) | 
 | 711 |                         MARK_POP_DISCARD(ctx->lastmark); | 
 | 712 |                     RETURN_ON_ERROR(ret); | 
 | 713 |                     RETURN_SUCCESS; | 
 | 714 |                 } | 
 | 715 |                 if (ctx->u.rep) | 
 | 716 |                     MARK_POP_KEEP(ctx->lastmark); | 
 | 717 |                 LASTMARK_RESTORE(); | 
 | 718 |             } | 
 | 719 |             if (ctx->u.rep) | 
 | 720 |                 MARK_POP_DISCARD(ctx->lastmark); | 
 | 721 |             RETURN_FAILURE; | 
 | 722 |  | 
 | 723 |         case SRE_OP_REPEAT_ONE: | 
 | 724 |             /* match repeated sequence (maximizing regexp) */ | 
 | 725 |  | 
 | 726 |             /* this operator only works if the repeated item is | 
 | 727 |                exactly one character wide, and we're not already | 
 | 728 |                collecting backtracking points.  for other cases, | 
 | 729 |                use the MAX_REPEAT operator */ | 
 | 730 |  | 
 | 731 |             /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ | 
 | 732 |  | 
 | 733 |             TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr, | 
 | 734 |                    ctx->pattern[1], ctx->pattern[2])); | 
 | 735 |  | 
 | 736 |             if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr) | 
 | 737 |                 RETURN_FAILURE; /* cannot match */ | 
 | 738 |  | 
 | 739 |             state->ptr = ctx->ptr; | 
 | 740 |  | 
 | 741 |             ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[2]); | 
 | 742 |             RETURN_ON_ERROR(ret); | 
 | 743 |             DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 
 | 744 |             ctx->count = ret; | 
 | 745 |             ctx->ptr += ctx->count; | 
 | 746 |  | 
 | 747 |             /* when we arrive here, count contains the number of | 
 | 748 |                matches, and ctx->ptr points to the tail of the target | 
 | 749 |                string.  check if the rest of the pattern matches, | 
 | 750 |                and backtrack if not. */ | 
 | 751 |  | 
 | 752 |             if (ctx->count < (Py_ssize_t) ctx->pattern[1]) | 
 | 753 |                 RETURN_FAILURE; | 
 | 754 |  | 
| Serhiy Storchaka | 32eddc1 | 2013-11-23 23:20:30 +0200 | [diff] [blame] | 755 |             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS && | 
| Serhiy Storchaka | 429b59e | 2014-05-14 21:48:17 +0300 | [diff] [blame] | 756 |                 ctx->ptr == state->end) { | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 757 |                 /* tail is empty.  we're finished */ | 
 | 758 |                 state->ptr = ctx->ptr; | 
 | 759 |                 RETURN_SUCCESS; | 
 | 760 |             } | 
 | 761 |  | 
 | 762 |             LASTMARK_SAVE(); | 
 | 763 |  | 
 | 764 |             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) { | 
 | 765 |                 /* tail starts with a literal. skip positions where | 
 | 766 |                    the rest of the pattern cannot possibly match */ | 
 | 767 |                 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1]; | 
 | 768 |                 for (;;) { | 
 | 769 |                     while (ctx->count >= (Py_ssize_t) ctx->pattern[1] && | 
 | 770 |                            (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) { | 
 | 771 |                         ctx->ptr--; | 
 | 772 |                         ctx->count--; | 
 | 773 |                     } | 
 | 774 |                     if (ctx->count < (Py_ssize_t) ctx->pattern[1]) | 
 | 775 |                         break; | 
 | 776 |                     state->ptr = ctx->ptr; | 
 | 777 |                     DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1, | 
 | 778 |                             ctx->pattern+ctx->pattern[0]); | 
 | 779 |                     if (ret) { | 
 | 780 |                         RETURN_ON_ERROR(ret); | 
 | 781 |                         RETURN_SUCCESS; | 
 | 782 |                     } | 
 | 783 |  | 
 | 784 |                     LASTMARK_RESTORE(); | 
 | 785 |  | 
 | 786 |                     ctx->ptr--; | 
 | 787 |                     ctx->count--; | 
 | 788 |                 } | 
 | 789 |  | 
 | 790 |             } else { | 
 | 791 |                 /* general case */ | 
 | 792 |                 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) { | 
 | 793 |                     state->ptr = ctx->ptr; | 
 | 794 |                     DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2, | 
 | 795 |                             ctx->pattern+ctx->pattern[0]); | 
 | 796 |                     if (ret) { | 
 | 797 |                         RETURN_ON_ERROR(ret); | 
 | 798 |                         RETURN_SUCCESS; | 
 | 799 |                     } | 
 | 800 |                     ctx->ptr--; | 
 | 801 |                     ctx->count--; | 
 | 802 |                     LASTMARK_RESTORE(); | 
 | 803 |                 } | 
 | 804 |             } | 
 | 805 |             RETURN_FAILURE; | 
 | 806 |  | 
 | 807 |         case SRE_OP_MIN_REPEAT_ONE: | 
 | 808 |             /* match repeated sequence (minimizing regexp) */ | 
 | 809 |  | 
 | 810 |             /* this operator only works if the repeated item is | 
 | 811 |                exactly one character wide, and we're not already | 
 | 812 |                collecting backtracking points.  for other cases, | 
 | 813 |                use the MIN_REPEAT operator */ | 
 | 814 |  | 
 | 815 |             /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ | 
 | 816 |  | 
 | 817 |             TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr, | 
 | 818 |                    ctx->pattern[1], ctx->pattern[2])); | 
 | 819 |  | 
 | 820 |             if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr) | 
 | 821 |                 RETURN_FAILURE; /* cannot match */ | 
 | 822 |  | 
 | 823 |             state->ptr = ctx->ptr; | 
 | 824 |  | 
 | 825 |             if (ctx->pattern[1] == 0) | 
 | 826 |                 ctx->count = 0; | 
 | 827 |             else { | 
 | 828 |                 /* count using pattern min as the maximum */ | 
 | 829 |                 ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[1]); | 
 | 830 |                 RETURN_ON_ERROR(ret); | 
 | 831 |                 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 
 | 832 |                 if (ret < (Py_ssize_t) ctx->pattern[1]) | 
 | 833 |                     /* didn't match minimum number of times */ | 
 | 834 |                     RETURN_FAILURE; | 
 | 835 |                 /* advance past minimum matches of repeat */ | 
 | 836 |                 ctx->count = ret; | 
 | 837 |                 ctx->ptr += ctx->count; | 
 | 838 |             } | 
 | 839 |  | 
| Serhiy Storchaka | 32eddc1 | 2013-11-23 23:20:30 +0200 | [diff] [blame] | 840 |             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS && | 
| Serhiy Storchaka | 429b59e | 2014-05-14 21:48:17 +0300 | [diff] [blame] | 841 |                 (!match_all || ctx->ptr == state->end)) { | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 842 |                 /* tail is empty.  we're finished */ | 
 | 843 |                 state->ptr = ctx->ptr; | 
 | 844 |                 RETURN_SUCCESS; | 
 | 845 |  | 
 | 846 |             } else { | 
 | 847 |                 /* general case */ | 
 | 848 |                 LASTMARK_SAVE(); | 
 | 849 |                 while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT | 
 | 850 |                        || ctx->count <= (Py_ssize_t)ctx->pattern[2]) { | 
 | 851 |                     state->ptr = ctx->ptr; | 
 | 852 |                     DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, | 
 | 853 |                             ctx->pattern+ctx->pattern[0]); | 
 | 854 |                     if (ret) { | 
 | 855 |                         RETURN_ON_ERROR(ret); | 
 | 856 |                         RETURN_SUCCESS; | 
 | 857 |                     } | 
 | 858 |                     state->ptr = ctx->ptr; | 
 | 859 |                     ret = SRE(count)(state, ctx->pattern+3, 1); | 
 | 860 |                     RETURN_ON_ERROR(ret); | 
 | 861 |                     DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 
 | 862 |                     if (ret == 0) | 
 | 863 |                         break; | 
 | 864 |                     assert(ret == 1); | 
 | 865 |                     ctx->ptr++; | 
 | 866 |                     ctx->count++; | 
 | 867 |                     LASTMARK_RESTORE(); | 
 | 868 |                 } | 
 | 869 |             } | 
 | 870 |             RETURN_FAILURE; | 
 | 871 |  | 
 | 872 |         case SRE_OP_REPEAT: | 
 | 873 |             /* create repeat context.  all the hard work is done | 
 | 874 |                by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ | 
 | 875 |             /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */ | 
 | 876 |             TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr, | 
 | 877 |                    ctx->pattern[1], ctx->pattern[2])); | 
 | 878 |  | 
 | 879 |             /* install new repeat context */ | 
 | 880 |             ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep)); | 
 | 881 |             if (!ctx->u.rep) { | 
 | 882 |                 PyErr_NoMemory(); | 
 | 883 |                 RETURN_FAILURE; | 
 | 884 |             } | 
 | 885 |             ctx->u.rep->count = -1; | 
 | 886 |             ctx->u.rep->pattern = ctx->pattern; | 
 | 887 |             ctx->u.rep->prev = state->repeat; | 
 | 888 |             ctx->u.rep->last_ptr = NULL; | 
 | 889 |             state->repeat = ctx->u.rep; | 
 | 890 |  | 
 | 891 |             state->ptr = ctx->ptr; | 
 | 892 |             DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]); | 
 | 893 |             state->repeat = ctx->u.rep->prev; | 
 | 894 |             PyObject_FREE(ctx->u.rep); | 
 | 895 |  | 
 | 896 |             if (ret) { | 
 | 897 |                 RETURN_ON_ERROR(ret); | 
 | 898 |                 RETURN_SUCCESS; | 
 | 899 |             } | 
 | 900 |             RETURN_FAILURE; | 
 | 901 |  | 
 | 902 |         case SRE_OP_MAX_UNTIL: | 
 | 903 |             /* maximizing repeat */ | 
 | 904 |             /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */ | 
 | 905 |  | 
 | 906 |             /* FIXME: we probably need to deal with zero-width | 
 | 907 |                matches in here... */ | 
 | 908 |  | 
 | 909 |             ctx->u.rep = state->repeat; | 
 | 910 |             if (!ctx->u.rep) | 
 | 911 |                 RETURN_ERROR(SRE_ERROR_STATE); | 
 | 912 |  | 
 | 913 |             state->ptr = ctx->ptr; | 
 | 914 |  | 
 | 915 |             ctx->count = ctx->u.rep->count+1; | 
 | 916 |  | 
 | 917 |             TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern, | 
 | 918 |                    ctx->ptr, ctx->count)); | 
 | 919 |  | 
 | 920 |             if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { | 
 | 921 |                 /* not enough matches */ | 
 | 922 |                 ctx->u.rep->count = ctx->count; | 
 | 923 |                 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1, | 
 | 924 |                         ctx->u.rep->pattern+3); | 
 | 925 |                 if (ret) { | 
 | 926 |                     RETURN_ON_ERROR(ret); | 
 | 927 |                     RETURN_SUCCESS; | 
 | 928 |                 } | 
 | 929 |                 ctx->u.rep->count = ctx->count-1; | 
 | 930 |                 state->ptr = ctx->ptr; | 
 | 931 |                 RETURN_FAILURE; | 
 | 932 |             } | 
 | 933 |  | 
 | 934 |             if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] || | 
 | 935 |                 ctx->u.rep->pattern[2] == SRE_MAXREPEAT) && | 
 | 936 |                 state->ptr != ctx->u.rep->last_ptr) { | 
 | 937 |                 /* we may have enough matches, but if we can | 
 | 938 |                    match another item, do so */ | 
 | 939 |                 ctx->u.rep->count = ctx->count; | 
 | 940 |                 LASTMARK_SAVE(); | 
 | 941 |                 MARK_PUSH(ctx->lastmark); | 
 | 942 |                 /* zero-width match protection */ | 
 | 943 |                 DATA_PUSH(&ctx->u.rep->last_ptr); | 
 | 944 |                 ctx->u.rep->last_ptr = state->ptr; | 
 | 945 |                 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2, | 
 | 946 |                         ctx->u.rep->pattern+3); | 
 | 947 |                 DATA_POP(&ctx->u.rep->last_ptr); | 
 | 948 |                 if (ret) { | 
 | 949 |                     MARK_POP_DISCARD(ctx->lastmark); | 
 | 950 |                     RETURN_ON_ERROR(ret); | 
 | 951 |                     RETURN_SUCCESS; | 
 | 952 |                 } | 
 | 953 |                 MARK_POP(ctx->lastmark); | 
 | 954 |                 LASTMARK_RESTORE(); | 
 | 955 |                 ctx->u.rep->count = ctx->count-1; | 
 | 956 |                 state->ptr = ctx->ptr; | 
 | 957 |             } | 
 | 958 |  | 
 | 959 |             /* cannot match more repeated items here.  make sure the | 
 | 960 |                tail matches */ | 
 | 961 |             state->repeat = ctx->u.rep->prev; | 
 | 962 |             DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern); | 
 | 963 |             RETURN_ON_SUCCESS(ret); | 
 | 964 |             state->repeat = ctx->u.rep; | 
 | 965 |             state->ptr = ctx->ptr; | 
 | 966 |             RETURN_FAILURE; | 
 | 967 |  | 
 | 968 |         case SRE_OP_MIN_UNTIL: | 
 | 969 |             /* minimizing repeat */ | 
 | 970 |             /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */ | 
 | 971 |  | 
 | 972 |             ctx->u.rep = state->repeat; | 
 | 973 |             if (!ctx->u.rep) | 
 | 974 |                 RETURN_ERROR(SRE_ERROR_STATE); | 
 | 975 |  | 
 | 976 |             state->ptr = ctx->ptr; | 
 | 977 |  | 
 | 978 |             ctx->count = ctx->u.rep->count+1; | 
 | 979 |  | 
 | 980 |             TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern, | 
 | 981 |                    ctx->ptr, ctx->count, ctx->u.rep->pattern)); | 
 | 982 |  | 
 | 983 |             if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { | 
 | 984 |                 /* not enough matches */ | 
 | 985 |                 ctx->u.rep->count = ctx->count; | 
 | 986 |                 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1, | 
 | 987 |                         ctx->u.rep->pattern+3); | 
 | 988 |                 if (ret) { | 
 | 989 |                     RETURN_ON_ERROR(ret); | 
 | 990 |                     RETURN_SUCCESS; | 
 | 991 |                 } | 
 | 992 |                 ctx->u.rep->count = ctx->count-1; | 
 | 993 |                 state->ptr = ctx->ptr; | 
 | 994 |                 RETURN_FAILURE; | 
 | 995 |             } | 
 | 996 |  | 
 | 997 |             LASTMARK_SAVE(); | 
 | 998 |  | 
 | 999 |             /* see if the tail matches */ | 
 | 1000 |             state->repeat = ctx->u.rep->prev; | 
 | 1001 |             DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern); | 
 | 1002 |             if (ret) { | 
 | 1003 |                 RETURN_ON_ERROR(ret); | 
 | 1004 |                 RETURN_SUCCESS; | 
 | 1005 |             } | 
 | 1006 |  | 
 | 1007 |             state->repeat = ctx->u.rep; | 
 | 1008 |             state->ptr = ctx->ptr; | 
 | 1009 |  | 
 | 1010 |             LASTMARK_RESTORE(); | 
 | 1011 |  | 
 | 1012 |             if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2] | 
 | 1013 |                 && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) || | 
 | 1014 |                 state->ptr == ctx->u.rep->last_ptr) | 
 | 1015 |                 RETURN_FAILURE; | 
 | 1016 |  | 
 | 1017 |             ctx->u.rep->count = ctx->count; | 
 | 1018 |             /* zero-width match protection */ | 
 | 1019 |             DATA_PUSH(&ctx->u.rep->last_ptr); | 
 | 1020 |             ctx->u.rep->last_ptr = state->ptr; | 
 | 1021 |             DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3, | 
 | 1022 |                     ctx->u.rep->pattern+3); | 
 | 1023 |             DATA_POP(&ctx->u.rep->last_ptr); | 
 | 1024 |             if (ret) { | 
 | 1025 |                 RETURN_ON_ERROR(ret); | 
 | 1026 |                 RETURN_SUCCESS; | 
 | 1027 |             } | 
 | 1028 |             ctx->u.rep->count = ctx->count-1; | 
 | 1029 |             state->ptr = ctx->ptr; | 
 | 1030 |             RETURN_FAILURE; | 
 | 1031 |  | 
 | 1032 |         case SRE_OP_GROUPREF: | 
 | 1033 |             /* match backreference */ | 
 | 1034 |             TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern, | 
 | 1035 |                    ctx->ptr, ctx->pattern[0])); | 
 | 1036 |             i = ctx->pattern[0]; | 
 | 1037 |             { | 
 | 1038 |                 Py_ssize_t groupref = i+i; | 
 | 1039 |                 if (groupref >= state->lastmark) { | 
 | 1040 |                     RETURN_FAILURE; | 
 | 1041 |                 } else { | 
 | 1042 |                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 
 | 1043 |                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 
 | 1044 |                     if (!p || !e || e < p) | 
 | 1045 |                         RETURN_FAILURE; | 
 | 1046 |                     while (p < e) { | 
 | 1047 |                         if (ctx->ptr >= end || *ctx->ptr != *p) | 
 | 1048 |                             RETURN_FAILURE; | 
 | 1049 |                         p++; | 
 | 1050 |                         ctx->ptr++; | 
 | 1051 |                     } | 
 | 1052 |                 } | 
 | 1053 |             } | 
 | 1054 |             ctx->pattern++; | 
 | 1055 |             break; | 
 | 1056 |  | 
 | 1057 |         case SRE_OP_GROUPREF_IGNORE: | 
 | 1058 |             /* match backreference */ | 
 | 1059 |             TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern, | 
 | 1060 |                    ctx->ptr, ctx->pattern[0])); | 
 | 1061 |             i = ctx->pattern[0]; | 
 | 1062 |             { | 
 | 1063 |                 Py_ssize_t groupref = i+i; | 
 | 1064 |                 if (groupref >= state->lastmark) { | 
 | 1065 |                     RETURN_FAILURE; | 
 | 1066 |                 } else { | 
 | 1067 |                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 
 | 1068 |                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 
 | 1069 |                     if (!p || !e || e < p) | 
 | 1070 |                         RETURN_FAILURE; | 
 | 1071 |                     while (p < e) { | 
 | 1072 |                         if (ctx->ptr >= end || | 
 | 1073 |                             state->lower(*ctx->ptr) != state->lower(*p)) | 
 | 1074 |                             RETURN_FAILURE; | 
 | 1075 |                         p++; | 
 | 1076 |                         ctx->ptr++; | 
 | 1077 |                     } | 
 | 1078 |                 } | 
 | 1079 |             } | 
 | 1080 |             ctx->pattern++; | 
 | 1081 |             break; | 
 | 1082 |  | 
 | 1083 |         case SRE_OP_GROUPREF_EXISTS: | 
 | 1084 |             TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern, | 
 | 1085 |                    ctx->ptr, ctx->pattern[0])); | 
 | 1086 |             /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */ | 
 | 1087 |             i = ctx->pattern[0]; | 
 | 1088 |             { | 
 | 1089 |                 Py_ssize_t groupref = i+i; | 
 | 1090 |                 if (groupref >= state->lastmark) { | 
 | 1091 |                     ctx->pattern += ctx->pattern[1]; | 
 | 1092 |                     break; | 
 | 1093 |                 } else { | 
 | 1094 |                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 
 | 1095 |                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 
 | 1096 |                     if (!p || !e || e < p) { | 
 | 1097 |                         ctx->pattern += ctx->pattern[1]; | 
 | 1098 |                         break; | 
 | 1099 |                     } | 
 | 1100 |                 } | 
 | 1101 |             } | 
 | 1102 |             ctx->pattern += 2; | 
 | 1103 |             break; | 
 | 1104 |  | 
 | 1105 |         case SRE_OP_ASSERT: | 
 | 1106 |             /* assert subpattern */ | 
 | 1107 |             /* <ASSERT> <skip> <back> <pattern> */ | 
 | 1108 |             TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern, | 
 | 1109 |                    ctx->ptr, ctx->pattern[1])); | 
| Serhiy Storchaka | 03d6ee3 | 2015-07-06 13:58:33 +0300 | [diff] [blame] | 1110 |             if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1]) | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 1111 |                 RETURN_FAILURE; | 
| Serhiy Storchaka | 03d6ee3 | 2015-07-06 13:58:33 +0300 | [diff] [blame] | 1112 |             state->ptr = ctx->ptr - ctx->pattern[1]; | 
| Serhiy Storchaka | 32eddc1 | 2013-11-23 23:20:30 +0200 | [diff] [blame] | 1113 |             DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2); | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 1114 |             RETURN_ON_FAILURE(ret); | 
 | 1115 |             ctx->pattern += ctx->pattern[0]; | 
 | 1116 |             break; | 
 | 1117 |  | 
 | 1118 |         case SRE_OP_ASSERT_NOT: | 
 | 1119 |             /* assert not subpattern */ | 
 | 1120 |             /* <ASSERT_NOT> <skip> <back> <pattern> */ | 
 | 1121 |             TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern, | 
 | 1122 |                    ctx->ptr, ctx->pattern[1])); | 
| Serhiy Storchaka | 03d6ee3 | 2015-07-06 13:58:33 +0300 | [diff] [blame] | 1123 |             if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) { | 
 | 1124 |                 state->ptr = ctx->ptr - ctx->pattern[1]; | 
| Serhiy Storchaka | 32eddc1 | 2013-11-23 23:20:30 +0200 | [diff] [blame] | 1125 |                 DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2); | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 1126 |                 if (ret) { | 
 | 1127 |                     RETURN_ON_ERROR(ret); | 
 | 1128 |                     RETURN_FAILURE; | 
 | 1129 |                 } | 
 | 1130 |             } | 
 | 1131 |             ctx->pattern += ctx->pattern[0]; | 
 | 1132 |             break; | 
 | 1133 |  | 
 | 1134 |         case SRE_OP_FAILURE: | 
 | 1135 |             /* immediate failure */ | 
 | 1136 |             TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr)); | 
 | 1137 |             RETURN_FAILURE; | 
 | 1138 |  | 
 | 1139 |         default: | 
 | 1140 |             TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr, | 
 | 1141 |                    ctx->pattern[-1])); | 
 | 1142 |             RETURN_ERROR(SRE_ERROR_ILLEGAL); | 
 | 1143 |         } | 
 | 1144 |     } | 
 | 1145 |  | 
 | 1146 | exit: | 
 | 1147 |     ctx_pos = ctx->last_ctx_pos; | 
 | 1148 |     jump = ctx->jump; | 
 | 1149 |     DATA_POP_DISCARD(ctx); | 
 | 1150 |     if (ctx_pos == -1) | 
 | 1151 |         return ret; | 
 | 1152 |     DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 
 | 1153 |  | 
 | 1154 |     switch (jump) { | 
 | 1155 |         case JUMP_MAX_UNTIL_2: | 
 | 1156 |             TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr)); | 
 | 1157 |             goto jump_max_until_2; | 
 | 1158 |         case JUMP_MAX_UNTIL_3: | 
 | 1159 |             TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr)); | 
 | 1160 |             goto jump_max_until_3; | 
 | 1161 |         case JUMP_MIN_UNTIL_2: | 
 | 1162 |             TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr)); | 
 | 1163 |             goto jump_min_until_2; | 
 | 1164 |         case JUMP_MIN_UNTIL_3: | 
 | 1165 |             TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr)); | 
 | 1166 |             goto jump_min_until_3; | 
 | 1167 |         case JUMP_BRANCH: | 
 | 1168 |             TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr)); | 
 | 1169 |             goto jump_branch; | 
 | 1170 |         case JUMP_MAX_UNTIL_1: | 
 | 1171 |             TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr)); | 
 | 1172 |             goto jump_max_until_1; | 
 | 1173 |         case JUMP_MIN_UNTIL_1: | 
 | 1174 |             TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr)); | 
 | 1175 |             goto jump_min_until_1; | 
 | 1176 |         case JUMP_REPEAT: | 
 | 1177 |             TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr)); | 
 | 1178 |             goto jump_repeat; | 
 | 1179 |         case JUMP_REPEAT_ONE_1: | 
 | 1180 |             TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr)); | 
 | 1181 |             goto jump_repeat_one_1; | 
 | 1182 |         case JUMP_REPEAT_ONE_2: | 
 | 1183 |             TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr)); | 
 | 1184 |             goto jump_repeat_one_2; | 
 | 1185 |         case JUMP_MIN_REPEAT_ONE: | 
 | 1186 |             TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr)); | 
 | 1187 |             goto jump_min_repeat_one; | 
 | 1188 |         case JUMP_ASSERT: | 
 | 1189 |             TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr)); | 
 | 1190 |             goto jump_assert; | 
 | 1191 |         case JUMP_ASSERT_NOT: | 
 | 1192 |             TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr)); | 
 | 1193 |             goto jump_assert_not; | 
 | 1194 |         case JUMP_NONE: | 
 | 1195 |             TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern, | 
 | 1196 |                    ctx->ptr, ret)); | 
 | 1197 |             break; | 
 | 1198 |     } | 
 | 1199 |  | 
 | 1200 |     return ret; /* should never get here */ | 
 | 1201 | } | 
 | 1202 |  | 
 | 1203 | LOCAL(Py_ssize_t) | 
 | 1204 | SRE(search)(SRE_STATE* state, SRE_CODE* pattern) | 
 | 1205 | { | 
 | 1206 |     SRE_CHAR* ptr = (SRE_CHAR *)state->start; | 
 | 1207 |     SRE_CHAR* end = (SRE_CHAR *)state->end; | 
 | 1208 |     Py_ssize_t status = 0; | 
 | 1209 |     Py_ssize_t prefix_len = 0; | 
 | 1210 |     Py_ssize_t prefix_skip = 0; | 
 | 1211 |     SRE_CODE* prefix = NULL; | 
 | 1212 |     SRE_CODE* charset = NULL; | 
 | 1213 |     SRE_CODE* overlap = NULL; | 
 | 1214 |     int flags = 0; | 
 | 1215 |  | 
| Serhiy Storchaka | 03d6ee3 | 2015-07-06 13:58:33 +0300 | [diff] [blame] | 1216 |     if (ptr > end) | 
 | 1217 |         return 0; | 
 | 1218 |  | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 1219 |     if (pattern[0] == SRE_OP_INFO) { | 
 | 1220 |         /* optimization info block */ | 
 | 1221 |         /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */ | 
 | 1222 |  | 
 | 1223 |         flags = pattern[2]; | 
 | 1224 |  | 
| Serhiy Storchaka | 03d6ee3 | 2015-07-06 13:58:33 +0300 | [diff] [blame] | 1225 |         if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) { | 
 | 1226 |             TRACE(("reject (got %u chars, need %u)\n", | 
 | 1227 |                    (unsigned int)(end - ptr), pattern[3])); | 
 | 1228 |             return 0; | 
 | 1229 |         } | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 1230 |         if (pattern[3] > 1) { | 
 | 1231 |             /* adjust end point (but make sure we leave at least one | 
 | 1232 |                character in there, so literal search will work) */ | 
 | 1233 |             end -= pattern[3] - 1; | 
 | 1234 |             if (end <= ptr) | 
 | 1235 |                 end = ptr; | 
 | 1236 |         } | 
 | 1237 |  | 
 | 1238 |         if (flags & SRE_INFO_PREFIX) { | 
 | 1239 |             /* pattern starts with a known prefix */ | 
 | 1240 |             /* <length> <skip> <prefix data> <overlap data> */ | 
 | 1241 |             prefix_len = pattern[5]; | 
 | 1242 |             prefix_skip = pattern[6]; | 
 | 1243 |             prefix = pattern + 7; | 
 | 1244 |             overlap = prefix + prefix_len - 1; | 
 | 1245 |         } else if (flags & SRE_INFO_CHARSET) | 
 | 1246 |             /* pattern starts with a character from a known set */ | 
 | 1247 |             /* <charset> */ | 
 | 1248 |             charset = pattern + 5; | 
 | 1249 |  | 
 | 1250 |         pattern += 1 + pattern[1]; | 
 | 1251 |     } | 
 | 1252 |  | 
 | 1253 |     TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n", | 
 | 1254 |            prefix, prefix_len, prefix_skip)); | 
 | 1255 |     TRACE(("charset = %p\n", charset)); | 
 | 1256 |  | 
| Serhiy Storchaka | 66dc464 | 2015-06-21 14:06:55 +0300 | [diff] [blame] | 1257 |     if (prefix_len == 1) { | 
 | 1258 |         /* pattern starts with a literal character */ | 
 | 1259 |         SRE_CHAR c = (SRE_CHAR) prefix[0]; | 
 | 1260 | #if SIZEOF_SRE_CHAR < 4 | 
 | 1261 |         if ((SRE_CODE) c != prefix[0]) | 
 | 1262 |             return 0; /* literal can't match: doesn't fit in char width */ | 
 | 1263 | #endif | 
 | 1264 |         end = (SRE_CHAR *)state->end; | 
 | 1265 |         while (ptr < end) { | 
 | 1266 |             while (*ptr != c) { | 
 | 1267 |                 if (++ptr >= end) | 
 | 1268 |                     return 0; | 
 | 1269 |             } | 
 | 1270 |             TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); | 
 | 1271 |             state->start = ptr; | 
 | 1272 |             state->ptr = ptr + prefix_skip; | 
 | 1273 |             if (flags & SRE_INFO_LITERAL) | 
 | 1274 |                 return 1; /* we got all of it */ | 
 | 1275 |             status = SRE(match)(state, pattern + 2*prefix_skip, 0); | 
 | 1276 |             if (status != 0) | 
 | 1277 |                 return status; | 
 | 1278 |             ++ptr; | 
 | 1279 |         } | 
 | 1280 |         return 0; | 
 | 1281 |     } | 
 | 1282 |  | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 1283 |     if (prefix_len > 1) { | 
 | 1284 |         /* pattern starts with a known prefix.  use the overlap | 
 | 1285 |            table to skip forward as fast as we possibly can */ | 
 | 1286 |         Py_ssize_t i = 0; | 
 | 1287 |  | 
 | 1288 |         end = (SRE_CHAR *)state->end; | 
 | 1289 |         if (prefix_len > end - ptr) | 
 | 1290 |             return 0; | 
 | 1291 | #if SIZEOF_SRE_CHAR < 4 | 
 | 1292 |         for (i = 0; i < prefix_len; i++) | 
 | 1293 |             if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i]) | 
 | 1294 |                 return 0; /* literal can't match: doesn't fit in char width */ | 
 | 1295 | #endif | 
 | 1296 |         while (ptr < end) { | 
 | 1297 |             SRE_CHAR c = (SRE_CHAR) prefix[0]; | 
 | 1298 |             while (*ptr++ != c) { | 
 | 1299 |                 if (ptr >= end) | 
 | 1300 |                     return 0; | 
 | 1301 |             } | 
 | 1302 |             if (ptr >= end) | 
 | 1303 |                 return 0; | 
 | 1304 |  | 
 | 1305 |             i = 1; | 
 | 1306 |             do { | 
 | 1307 |                 if (*ptr == (SRE_CHAR) prefix[i]) { | 
 | 1308 |                     if (++i != prefix_len) { | 
 | 1309 |                         if (++ptr >= end) | 
 | 1310 |                             return 0; | 
 | 1311 |                         continue; | 
 | 1312 |                     } | 
 | 1313 |                     /* found a potential match */ | 
 | 1314 |                     TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr)); | 
 | 1315 |                     state->start = ptr - (prefix_len - 1); | 
 | 1316 |                     state->ptr = ptr - (prefix_len - prefix_skip - 1); | 
 | 1317 |                     if (flags & SRE_INFO_LITERAL) | 
 | 1318 |                         return 1; /* we got all of it */ | 
| Serhiy Storchaka | 429b59e | 2014-05-14 21:48:17 +0300 | [diff] [blame] | 1319 |                     status = SRE(match)(state, pattern + 2*prefix_skip, 0); | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 1320 |                     if (status != 0) | 
 | 1321 |                         return status; | 
 | 1322 |                     /* close but no cigar -- try again */ | 
 | 1323 |                     if (++ptr >= end) | 
 | 1324 |                         return 0; | 
 | 1325 |                 } | 
 | 1326 |                 i = overlap[i]; | 
 | 1327 |             } while (i != 0); | 
 | 1328 |         } | 
 | 1329 |         return 0; | 
 | 1330 |     } | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 1331 |  | 
| Serhiy Storchaka | 66dc464 | 2015-06-21 14:06:55 +0300 | [diff] [blame] | 1332 |     if (charset) { | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 1333 |         /* pattern starts with a character from a known set */ | 
 | 1334 |         end = (SRE_CHAR *)state->end; | 
 | 1335 |         for (;;) { | 
| Serhiy Storchaka | 4b8f894 | 2014-10-31 12:36:56 +0200 | [diff] [blame] | 1336 |             while (ptr < end && !SRE(charset)(state, charset, *ptr)) | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 1337 |                 ptr++; | 
 | 1338 |             if (ptr >= end) | 
 | 1339 |                 return 0; | 
 | 1340 |             TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr)); | 
 | 1341 |             state->start = ptr; | 
 | 1342 |             state->ptr = ptr; | 
| Serhiy Storchaka | 429b59e | 2014-05-14 21:48:17 +0300 | [diff] [blame] | 1343 |             status = SRE(match)(state, pattern, 0); | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 1344 |             if (status != 0) | 
 | 1345 |                 break; | 
 | 1346 |             ptr++; | 
 | 1347 |         } | 
| Serhiy Storchaka | 03d6ee3 | 2015-07-06 13:58:33 +0300 | [diff] [blame] | 1348 |     } else { | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 1349 |         /* general case */ | 
| Serhiy Storchaka | 03d6ee3 | 2015-07-06 13:58:33 +0300 | [diff] [blame] | 1350 |         assert(ptr <= end); | 
 | 1351 |         while (1) { | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 1352 |             TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); | 
| Serhiy Storchaka | 03d6ee3 | 2015-07-06 13:58:33 +0300 | [diff] [blame] | 1353 |             state->start = state->ptr = ptr; | 
| Serhiy Storchaka | 429b59e | 2014-05-14 21:48:17 +0300 | [diff] [blame] | 1354 |             status = SRE(match)(state, pattern, 0); | 
| Serhiy Storchaka | 03d6ee3 | 2015-07-06 13:58:33 +0300 | [diff] [blame] | 1355 |             if (status != 0 || ptr >= end) | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 1356 |                 break; | 
| Serhiy Storchaka | 03d6ee3 | 2015-07-06 13:58:33 +0300 | [diff] [blame] | 1357 |             ptr++; | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 1358 |         } | 
| Serhiy Storchaka | 03d6ee3 | 2015-07-06 13:58:33 +0300 | [diff] [blame] | 1359 |     } | 
| Serhiy Storchaka | 8444ebb | 2013-10-26 11:18:42 +0300 | [diff] [blame] | 1360 |  | 
 | 1361 |     return status; | 
 | 1362 | } | 
 | 1363 |  | 
 | 1364 | #undef SRE_CHAR | 
 | 1365 | #undef SIZEOF_SRE_CHAR | 
 | 1366 | #undef SRE | 
 | 1367 |  | 
 | 1368 | /* vim:ts=4:sw=4:et | 
 | 1369 | */ |