blob: 0865fc63a004d57daba2347470ca2f09d0f17d34 [file] [log] [blame]
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001/*
2 * Secret Labs' Regular Expression Engine
3 *
4 * regular expression matching engine
5 *
6 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
7 *
8 * See the _sre.c file for information on usage and redistribution.
9 */
10
11/* String matching engine */
12
13/* This file is included three times, with different character settings */
14
15LOCAL(int)
16SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
17{
18 /* check if pointer is at given position */
19
20 Py_ssize_t thisp, thatp;
21
22 switch (at) {
23
24 case SRE_AT_BEGINNING:
25 case SRE_AT_BEGINNING_STRING:
26 return ((void*) ptr == state->beginning);
27
28 case SRE_AT_BEGINNING_LINE:
29 return ((void*) ptr == state->beginning ||
30 SRE_IS_LINEBREAK((int) ptr[-1]));
31
32 case SRE_AT_END:
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +030033 return (((SRE_CHAR *)state->end - ptr == 1 &&
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +030034 SRE_IS_LINEBREAK((int) ptr[0])) ||
35 ((void*) ptr == state->end));
36
37 case SRE_AT_END_LINE:
38 return ((void*) ptr == state->end ||
39 SRE_IS_LINEBREAK((int) ptr[0]));
40
41 case SRE_AT_END_STRING:
42 return ((void*) ptr == state->end);
43
44 case SRE_AT_BOUNDARY:
45 if (state->beginning == state->end)
46 return 0;
47 thatp = ((void*) ptr > state->beginning) ?
48 SRE_IS_WORD((int) ptr[-1]) : 0;
49 thisp = ((void*) ptr < state->end) ?
50 SRE_IS_WORD((int) ptr[0]) : 0;
51 return thisp != thatp;
52
53 case SRE_AT_NON_BOUNDARY:
54 if (state->beginning == state->end)
55 return 0;
56 thatp = ((void*) ptr > state->beginning) ?
57 SRE_IS_WORD((int) ptr[-1]) : 0;
58 thisp = ((void*) ptr < state->end) ?
59 SRE_IS_WORD((int) ptr[0]) : 0;
60 return thisp == thatp;
61
62 case SRE_AT_LOC_BOUNDARY:
63 if (state->beginning == state->end)
64 return 0;
65 thatp = ((void*) ptr > state->beginning) ?
66 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
67 thisp = ((void*) ptr < state->end) ?
68 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
69 return thisp != thatp;
70
71 case SRE_AT_LOC_NON_BOUNDARY:
72 if (state->beginning == state->end)
73 return 0;
74 thatp = ((void*) ptr > state->beginning) ?
75 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
76 thisp = ((void*) ptr < state->end) ?
77 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
78 return thisp == thatp;
79
80 case SRE_AT_UNI_BOUNDARY:
81 if (state->beginning == state->end)
82 return 0;
83 thatp = ((void*) ptr > state->beginning) ?
84 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
85 thisp = ((void*) ptr < state->end) ?
86 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
87 return thisp != thatp;
88
89 case SRE_AT_UNI_NON_BOUNDARY:
90 if (state->beginning == state->end)
91 return 0;
92 thatp = ((void*) ptr > state->beginning) ?
93 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
94 thisp = ((void*) ptr < state->end) ?
95 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
96 return thisp == thatp;
97
98 }
99
100 return 0;
101}
102
103LOCAL(int)
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200104SRE(charset)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch)
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300105{
106 /* check if character is a member of the given set */
107
108 int ok = 1;
109
110 for (;;) {
111 switch (*set++) {
112
113 case SRE_OP_FAILURE:
114 return !ok;
115
116 case SRE_OP_LITERAL:
117 /* <LITERAL> <code> */
118 if (ch == set[0])
119 return ok;
120 set++;
121 break;
122
123 case SRE_OP_CATEGORY:
124 /* <CATEGORY> <code> */
125 if (sre_category(set[0], (int) ch))
126 return ok;
127 set++;
128 break;
129
130 case SRE_OP_CHARSET:
131 /* <CHARSET> <bitmap> */
132 if (ch < 256 &&
133 (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
134 return ok;
135 set += 256/SRE_CODE_BITS;
136 break;
137
138 case SRE_OP_RANGE:
139 /* <RANGE> <lower> <upper> */
140 if (set[0] <= ch && ch <= set[1])
141 return ok;
142 set += 2;
143 break;
144
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200145 case SRE_OP_RANGE_IGNORE:
146 /* <RANGE_IGNORE> <lower> <upper> */
147 {
148 SRE_CODE uch;
149 /* ch is already lower cased */
150 if (set[0] <= ch && ch <= set[1])
151 return ok;
152 uch = state->upper(ch);
153 if (set[0] <= uch && uch <= set[1])
154 return ok;
155 set += 2;
156 break;
157 }
158
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300159 case SRE_OP_NEGATE:
160 ok = !ok;
161 break;
162
163 case SRE_OP_BIGCHARSET:
164 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
165 {
166 Py_ssize_t count, block;
167 count = *(set++);
168
169 if (ch < 0x10000u)
170 block = ((unsigned char*)set)[ch >> 8];
171 else
172 block = -1;
173 set += 256/sizeof(SRE_CODE);
174 if (block >=0 &&
175 (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
176 (1u << (ch & (SRE_CODE_BITS-1)))))
177 return ok;
178 set += count * (256/SRE_CODE_BITS);
179 break;
180 }
181
182 default:
183 /* internal error -- there's not much we can do about it
184 here, so let's just pretend it didn't match... */
185 return 0;
186 }
187 }
188}
189
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300190LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all);
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300191
192LOCAL(Py_ssize_t)
193SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
194{
195 SRE_CODE chr;
196 SRE_CHAR c;
197 SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
198 SRE_CHAR* end = (SRE_CHAR *)state->end;
199 Py_ssize_t i;
200
201 /* adjust end */
202 if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
203 end = ptr + maxcount;
204
205 switch (pattern[0]) {
206
207 case SRE_OP_IN:
208 /* repeated set */
209 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200210 while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300211 ptr++;
212 break;
213
214 case SRE_OP_ANY:
215 /* repeated dot wildcard. */
216 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
217 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
218 ptr++;
219 break;
220
221 case SRE_OP_ANY_ALL:
222 /* repeated dot wildcard. skip to the end of the target
223 string, and backtrack from there */
224 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
225 ptr = end;
226 break;
227
228 case SRE_OP_LITERAL:
229 /* repeated literal */
230 chr = pattern[1];
231 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
232 c = (SRE_CHAR) chr;
233#if SIZEOF_SRE_CHAR < 4
234 if ((SRE_CODE) c != chr)
235 ; /* literal can't match: doesn't fit in char width */
236 else
237#endif
238 while (ptr < end && *ptr == c)
239 ptr++;
240 break;
241
242 case SRE_OP_LITERAL_IGNORE:
243 /* repeated literal */
244 chr = pattern[1];
245 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
246 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
247 ptr++;
248 break;
249
250 case SRE_OP_NOT_LITERAL:
251 /* repeated non-literal */
252 chr = pattern[1];
253 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
254 c = (SRE_CHAR) chr;
255#if SIZEOF_SRE_CHAR < 4
256 if ((SRE_CODE) c != chr)
257 ptr = end; /* literal can't match: doesn't fit in char width */
258 else
259#endif
260 while (ptr < end && *ptr != c)
261 ptr++;
262 break;
263
264 case SRE_OP_NOT_LITERAL_IGNORE:
265 /* repeated non-literal */
266 chr = pattern[1];
267 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
268 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
269 ptr++;
270 break;
271
272 default:
273 /* repeated single character pattern */
274 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
275 while ((SRE_CHAR*) state->ptr < end) {
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300276 i = SRE(match)(state, pattern, 0);
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300277 if (i < 0)
278 return i;
279 if (!i)
280 break;
281 }
282 TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
283 (SRE_CHAR*) state->ptr - ptr));
284 return (SRE_CHAR*) state->ptr - ptr;
285 }
286
287 TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
288 ptr - (SRE_CHAR*) state->ptr));
289 return ptr - (SRE_CHAR*) state->ptr;
290}
291
292#if 0 /* not used in this release */
293LOCAL(int)
294SRE(info)(SRE_STATE* state, SRE_CODE* pattern)
295{
296 /* check if an SRE_OP_INFO block matches at the current position.
297 returns the number of SRE_CODE objects to skip if successful, 0
298 if no match */
299
300 SRE_CHAR* end = (SRE_CHAR*) state->end;
301 SRE_CHAR* ptr = (SRE_CHAR*) state->ptr;
302 Py_ssize_t i;
303
304 /* check minimal length */
305 if (pattern[3] && end - ptr < pattern[3])
306 return 0;
307
308 /* check known prefix */
309 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
310 /* <length> <skip> <prefix data> <overlap data> */
311 for (i = 0; i < pattern[5]; i++)
312 if ((SRE_CODE) ptr[i] != pattern[7 + i])
313 return 0;
314 return pattern[0] + 2 * pattern[6];
315 }
316 return pattern[0];
317}
318#endif
319
320/* The macros below should be used to protect recursive SRE(match)()
321 * calls that *failed* and do *not* return immediately (IOW, those
322 * that will backtrack). Explaining:
323 *
324 * - Recursive SRE(match)() returned true: that's usually a success
325 * (besides atypical cases like ASSERT_NOT), therefore there's no
326 * reason to restore lastmark;
327 *
328 * - Recursive SRE(match)() returned false but the current SRE(match)()
329 * is returning to the caller: If the current SRE(match)() is the
330 * top function of the recursion, returning false will be a matching
331 * failure, and it doesn't matter where lastmark is pointing to.
332 * If it's *not* the top function, it will be a recursive SRE(match)()
333 * failure by itself, and the calling SRE(match)() will have to deal
334 * with the failure by the same rules explained here (it will restore
335 * lastmark by itself if necessary);
336 *
337 * - Recursive SRE(match)() returned false, and will continue the
338 * outside 'for' loop: must be protected when breaking, since the next
339 * OP could potentially depend on lastmark;
340 *
341 * - Recursive SRE(match)() returned false, and will be called again
342 * inside a local for/while loop: must be protected between each
343 * loop iteration, since the recursive SRE(match)() could do anything,
344 * and could potentially depend on lastmark.
345 *
346 * For more information, check the discussion at SF patch #712900.
347 */
348#define LASTMARK_SAVE() \
349 do { \
350 ctx->lastmark = state->lastmark; \
351 ctx->lastindex = state->lastindex; \
352 } while (0)
353#define LASTMARK_RESTORE() \
354 do { \
355 state->lastmark = ctx->lastmark; \
356 state->lastindex = ctx->lastindex; \
357 } while (0)
358
359#define RETURN_ERROR(i) do { return i; } while(0)
360#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
361#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
362
363#define RETURN_ON_ERROR(i) \
364 do { if (i < 0) RETURN_ERROR(i); } while (0)
365#define RETURN_ON_SUCCESS(i) \
366 do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
367#define RETURN_ON_FAILURE(i) \
368 do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
369
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300370#define DATA_STACK_ALLOC(state, type, ptr) \
371do { \
372 alloc_pos = state->data_stack_base; \
373 TRACE(("allocating %s in %" PY_FORMAT_SIZE_T "d " \
374 "(%" PY_FORMAT_SIZE_T "d)\n", \
Serhiy Storchaka12b25382015-11-05 17:43:42 +0200375 Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300376 if (sizeof(type) > state->data_stack_size - alloc_pos) { \
377 int j = data_stack_grow(state, sizeof(type)); \
378 if (j < 0) return j; \
379 if (ctx_pos != -1) \
380 DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
381 } \
382 ptr = (type*)(state->data_stack+alloc_pos); \
383 state->data_stack_base += sizeof(type); \
384} while (0)
385
386#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
387do { \
Serhiy Storchaka12b25382015-11-05 17:43:42 +0200388 TRACE(("looking up %s at %" PY_FORMAT_SIZE_T "d\n", Py_STRINGIFY(type), pos)); \
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300389 ptr = (type*)(state->data_stack+pos); \
390} while (0)
391
392#define DATA_STACK_PUSH(state, data, size) \
393do { \
394 TRACE(("copy data in %p to %" PY_FORMAT_SIZE_T "d " \
395 "(%" PY_FORMAT_SIZE_T "d)\n", \
396 data, state->data_stack_base, size)); \
397 if (size > state->data_stack_size - state->data_stack_base) { \
398 int j = data_stack_grow(state, size); \
399 if (j < 0) return j; \
400 if (ctx_pos != -1) \
401 DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
402 } \
403 memcpy(state->data_stack+state->data_stack_base, data, size); \
404 state->data_stack_base += size; \
405} while (0)
406
407#define DATA_STACK_POP(state, data, size, discard) \
408do { \
409 TRACE(("copy data to %p from %" PY_FORMAT_SIZE_T "d " \
410 "(%" PY_FORMAT_SIZE_T "d)\n", \
411 data, state->data_stack_base-size, size)); \
412 memcpy(data, state->data_stack+state->data_stack_base-size, size); \
413 if (discard) \
414 state->data_stack_base -= size; \
415} while (0)
416
417#define DATA_STACK_POP_DISCARD(state, size) \
418do { \
419 TRACE(("discard data from %" PY_FORMAT_SIZE_T "d " \
420 "(%" PY_FORMAT_SIZE_T "d)\n", \
421 state->data_stack_base-size, size)); \
422 state->data_stack_base -= size; \
423} while(0)
424
425#define DATA_PUSH(x) \
426 DATA_STACK_PUSH(state, (x), sizeof(*(x)))
427#define DATA_POP(x) \
428 DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
429#define DATA_POP_DISCARD(x) \
430 DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
431#define DATA_ALLOC(t,p) \
432 DATA_STACK_ALLOC(state, t, p)
433#define DATA_LOOKUP_AT(t,p,pos) \
434 DATA_STACK_LOOKUP_AT(state,t,p,pos)
435
436#define MARK_PUSH(lastmark) \
437 do if (lastmark > 0) { \
438 i = lastmark; /* ctx->lastmark may change if reallocated */ \
439 DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
440 } while (0)
441#define MARK_POP(lastmark) \
442 do if (lastmark > 0) { \
443 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
444 } while (0)
445#define MARK_POP_KEEP(lastmark) \
446 do if (lastmark > 0) { \
447 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
448 } while (0)
449#define MARK_POP_DISCARD(lastmark) \
450 do if (lastmark > 0) { \
451 DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
452 } while (0)
453
454#define JUMP_NONE 0
455#define JUMP_MAX_UNTIL_1 1
456#define JUMP_MAX_UNTIL_2 2
457#define JUMP_MAX_UNTIL_3 3
458#define JUMP_MIN_UNTIL_1 4
459#define JUMP_MIN_UNTIL_2 5
460#define JUMP_MIN_UNTIL_3 6
461#define JUMP_REPEAT 7
462#define JUMP_REPEAT_ONE_1 8
463#define JUMP_REPEAT_ONE_2 9
464#define JUMP_MIN_REPEAT_ONE 10
465#define JUMP_BRANCH 11
466#define JUMP_ASSERT 12
467#define JUMP_ASSERT_NOT 13
468
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200469#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, matchall) \
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300470 DATA_ALLOC(SRE(match_context), nextctx); \
471 nextctx->last_ctx_pos = ctx_pos; \
472 nextctx->jump = jumpvalue; \
473 nextctx->pattern = nextpattern; \
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200474 nextctx->match_all = matchall; \
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300475 ctx_pos = alloc_pos; \
476 ctx = nextctx; \
477 goto entrance; \
478 jumplabel: \
479 while (0) /* gcc doesn't like labels at end of scopes */ \
480
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200481#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
482 DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->match_all)
483
484#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
485 DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
486
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300487typedef struct {
488 Py_ssize_t last_ctx_pos;
489 Py_ssize_t jump;
490 SRE_CHAR* ptr;
491 SRE_CODE* pattern;
492 Py_ssize_t count;
493 Py_ssize_t lastmark;
494 Py_ssize_t lastindex;
495 union {
496 SRE_CODE chr;
497 SRE_REPEAT* rep;
498 } u;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200499 int match_all;
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300500} SRE(match_context);
501
502/* check if string matches the given pattern. returns <0 for
503 error, 0 for failure, and 1 for success */
504LOCAL(Py_ssize_t)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300505SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all)
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300506{
507 SRE_CHAR* end = (SRE_CHAR *)state->end;
508 Py_ssize_t alloc_pos, ctx_pos = -1;
509 Py_ssize_t i, ret = 0;
510 Py_ssize_t jump;
511 unsigned int sigcount=0;
512
513 SRE(match_context)* ctx;
514 SRE(match_context)* nextctx;
515
516 TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
517
518 DATA_ALLOC(SRE(match_context), ctx);
519 ctx->last_ctx_pos = -1;
520 ctx->jump = JUMP_NONE;
521 ctx->pattern = pattern;
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300522 ctx->match_all = match_all;
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300523 ctx_pos = alloc_pos;
524
525entrance:
526
527 ctx->ptr = (SRE_CHAR *)state->ptr;
528
529 if (ctx->pattern[0] == SRE_OP_INFO) {
530 /* optimization info block */
531 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Benjamin Petersonca470632016-09-06 13:47:26 -0700532 if (ctx->pattern[3] && (uintptr_t)(end - ctx->ptr) < ctx->pattern[3]) {
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300533 TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, "
534 "need %" PY_FORMAT_SIZE_T "d)\n",
535 end - ctx->ptr, (Py_ssize_t) ctx->pattern[3]));
536 RETURN_FAILURE;
537 }
538 ctx->pattern += ctx->pattern[1] + 1;
539 }
540
541 for (;;) {
542 ++sigcount;
543 if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
544 RETURN_ERROR(SRE_ERROR_INTERRUPTED);
545
546 switch (*ctx->pattern++) {
547
548 case SRE_OP_MARK:
549 /* set mark */
550 /* <MARK> <gid> */
551 TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
552 ctx->ptr, ctx->pattern[0]));
553 i = ctx->pattern[0];
554 if (i & 1)
555 state->lastindex = i/2 + 1;
556 if (i > state->lastmark) {
557 /* state->lastmark is the highest valid index in the
558 state->mark array. If it is increased by more than 1,
559 the intervening marks must be set to NULL to signal
560 that these marks have not been encountered. */
561 Py_ssize_t j = state->lastmark + 1;
562 while (j < i)
563 state->mark[j++] = NULL;
564 state->lastmark = i;
565 }
566 state->mark[i] = ctx->ptr;
567 ctx->pattern++;
568 break;
569
570 case SRE_OP_LITERAL:
571 /* match literal string */
572 /* <LITERAL> <code> */
573 TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
574 ctx->ptr, *ctx->pattern));
575 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
576 RETURN_FAILURE;
577 ctx->pattern++;
578 ctx->ptr++;
579 break;
580
581 case SRE_OP_NOT_LITERAL:
582 /* match anything that is not literal character */
583 /* <NOT_LITERAL> <code> */
584 TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
585 ctx->ptr, *ctx->pattern));
586 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
587 RETURN_FAILURE;
588 ctx->pattern++;
589 ctx->ptr++;
590 break;
591
592 case SRE_OP_SUCCESS:
593 /* end of pattern */
594 TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200595 if (!ctx->match_all || ctx->ptr == state->end) {
596 state->ptr = ctx->ptr;
597 RETURN_SUCCESS;
598 }
599 RETURN_FAILURE;
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300600
601 case SRE_OP_AT:
602 /* match at given position */
603 /* <AT> <code> */
604 TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
605 if (!SRE(at)(state, ctx->ptr, *ctx->pattern))
606 RETURN_FAILURE;
607 ctx->pattern++;
608 break;
609
610 case SRE_OP_CATEGORY:
611 /* match at given category */
612 /* <CATEGORY> <code> */
613 TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
614 ctx->ptr, *ctx->pattern));
615 if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
616 RETURN_FAILURE;
617 ctx->pattern++;
618 ctx->ptr++;
619 break;
620
621 case SRE_OP_ANY:
622 /* match anything (except a newline) */
623 /* <ANY> */
624 TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
625 if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
626 RETURN_FAILURE;
627 ctx->ptr++;
628 break;
629
630 case SRE_OP_ANY_ALL:
631 /* match anything */
632 /* <ANY_ALL> */
633 TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
634 if (ctx->ptr >= end)
635 RETURN_FAILURE;
636 ctx->ptr++;
637 break;
638
639 case SRE_OP_IN:
640 /* match set member (or non_member) */
641 /* <IN> <skip> <set> */
642 TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200643 if (ctx->ptr >= end ||
644 !SRE(charset)(state, ctx->pattern + 1, *ctx->ptr))
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300645 RETURN_FAILURE;
646 ctx->pattern += ctx->pattern[0];
647 ctx->ptr++;
648 break;
649
650 case SRE_OP_LITERAL_IGNORE:
651 TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
652 ctx->pattern, ctx->ptr, ctx->pattern[0]));
653 if (ctx->ptr >= end ||
654 state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
655 RETURN_FAILURE;
656 ctx->pattern++;
657 ctx->ptr++;
658 break;
659
660 case SRE_OP_NOT_LITERAL_IGNORE:
661 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
662 ctx->pattern, ctx->ptr, *ctx->pattern));
663 if (ctx->ptr >= end ||
664 state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
665 RETURN_FAILURE;
666 ctx->pattern++;
667 ctx->ptr++;
668 break;
669
670 case SRE_OP_IN_IGNORE:
671 TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
672 if (ctx->ptr >= end
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200673 || !SRE(charset)(state, ctx->pattern+1,
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300674 (SRE_CODE)state->lower(*ctx->ptr)))
675 RETURN_FAILURE;
676 ctx->pattern += ctx->pattern[0];
677 ctx->ptr++;
678 break;
679
680 case SRE_OP_JUMP:
681 case SRE_OP_INFO:
682 /* jump forward */
683 /* <JUMP> <offset> */
684 TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
685 ctx->ptr, ctx->pattern[0]));
686 ctx->pattern += ctx->pattern[0];
687 break;
688
689 case SRE_OP_BRANCH:
690 /* alternation */
691 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
692 TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
693 LASTMARK_SAVE();
694 ctx->u.rep = state->repeat;
695 if (ctx->u.rep)
696 MARK_PUSH(ctx->lastmark);
697 for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
698 if (ctx->pattern[1] == SRE_OP_LITERAL &&
699 (ctx->ptr >= end ||
700 (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
701 continue;
702 if (ctx->pattern[1] == SRE_OP_IN &&
703 (ctx->ptr >= end ||
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200704 !SRE(charset)(state, ctx->pattern + 3,
705 (SRE_CODE) *ctx->ptr)))
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300706 continue;
707 state->ptr = ctx->ptr;
708 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
709 if (ret) {
710 if (ctx->u.rep)
711 MARK_POP_DISCARD(ctx->lastmark);
712 RETURN_ON_ERROR(ret);
713 RETURN_SUCCESS;
714 }
715 if (ctx->u.rep)
716 MARK_POP_KEEP(ctx->lastmark);
717 LASTMARK_RESTORE();
718 }
719 if (ctx->u.rep)
720 MARK_POP_DISCARD(ctx->lastmark);
721 RETURN_FAILURE;
722
723 case SRE_OP_REPEAT_ONE:
724 /* match repeated sequence (maximizing regexp) */
725
726 /* this operator only works if the repeated item is
727 exactly one character wide, and we're not already
728 collecting backtracking points. for other cases,
729 use the MAX_REPEAT operator */
730
731 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
732
733 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
734 ctx->pattern[1], ctx->pattern[2]));
735
736 if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
737 RETURN_FAILURE; /* cannot match */
738
739 state->ptr = ctx->ptr;
740
741 ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[2]);
742 RETURN_ON_ERROR(ret);
743 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
744 ctx->count = ret;
745 ctx->ptr += ctx->count;
746
747 /* when we arrive here, count contains the number of
748 matches, and ctx->ptr points to the tail of the target
749 string. check if the rest of the pattern matches,
750 and backtrack if not. */
751
752 if (ctx->count < (Py_ssize_t) ctx->pattern[1])
753 RETURN_FAILURE;
754
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200755 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300756 ctx->ptr == state->end) {
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300757 /* tail is empty. we're finished */
758 state->ptr = ctx->ptr;
759 RETURN_SUCCESS;
760 }
761
762 LASTMARK_SAVE();
763
764 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
765 /* tail starts with a literal. skip positions where
766 the rest of the pattern cannot possibly match */
767 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
768 for (;;) {
769 while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
770 (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
771 ctx->ptr--;
772 ctx->count--;
773 }
774 if (ctx->count < (Py_ssize_t) ctx->pattern[1])
775 break;
776 state->ptr = ctx->ptr;
777 DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
778 ctx->pattern+ctx->pattern[0]);
779 if (ret) {
780 RETURN_ON_ERROR(ret);
781 RETURN_SUCCESS;
782 }
783
784 LASTMARK_RESTORE();
785
786 ctx->ptr--;
787 ctx->count--;
788 }
789
790 } else {
791 /* general case */
792 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
793 state->ptr = ctx->ptr;
794 DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
795 ctx->pattern+ctx->pattern[0]);
796 if (ret) {
797 RETURN_ON_ERROR(ret);
798 RETURN_SUCCESS;
799 }
800 ctx->ptr--;
801 ctx->count--;
802 LASTMARK_RESTORE();
803 }
804 }
805 RETURN_FAILURE;
806
807 case SRE_OP_MIN_REPEAT_ONE:
808 /* match repeated sequence (minimizing regexp) */
809
810 /* this operator only works if the repeated item is
811 exactly one character wide, and we're not already
812 collecting backtracking points. for other cases,
813 use the MIN_REPEAT operator */
814
815 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
816
817 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
818 ctx->pattern[1], ctx->pattern[2]));
819
820 if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
821 RETURN_FAILURE; /* cannot match */
822
823 state->ptr = ctx->ptr;
824
825 if (ctx->pattern[1] == 0)
826 ctx->count = 0;
827 else {
828 /* count using pattern min as the maximum */
829 ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[1]);
830 RETURN_ON_ERROR(ret);
831 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
832 if (ret < (Py_ssize_t) ctx->pattern[1])
833 /* didn't match minimum number of times */
834 RETURN_FAILURE;
835 /* advance past minimum matches of repeat */
836 ctx->count = ret;
837 ctx->ptr += ctx->count;
838 }
839
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200840 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300841 (!match_all || ctx->ptr == state->end)) {
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300842 /* tail is empty. we're finished */
843 state->ptr = ctx->ptr;
844 RETURN_SUCCESS;
845
846 } else {
847 /* general case */
848 LASTMARK_SAVE();
849 while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
850 || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
851 state->ptr = ctx->ptr;
852 DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
853 ctx->pattern+ctx->pattern[0]);
854 if (ret) {
855 RETURN_ON_ERROR(ret);
856 RETURN_SUCCESS;
857 }
858 state->ptr = ctx->ptr;
859 ret = SRE(count)(state, ctx->pattern+3, 1);
860 RETURN_ON_ERROR(ret);
861 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
862 if (ret == 0)
863 break;
864 assert(ret == 1);
865 ctx->ptr++;
866 ctx->count++;
867 LASTMARK_RESTORE();
868 }
869 }
870 RETURN_FAILURE;
871
872 case SRE_OP_REPEAT:
873 /* create repeat context. all the hard work is done
874 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
875 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
876 TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
877 ctx->pattern[1], ctx->pattern[2]));
878
879 /* install new repeat context */
880 ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
881 if (!ctx->u.rep) {
882 PyErr_NoMemory();
883 RETURN_FAILURE;
884 }
885 ctx->u.rep->count = -1;
886 ctx->u.rep->pattern = ctx->pattern;
887 ctx->u.rep->prev = state->repeat;
888 ctx->u.rep->last_ptr = NULL;
889 state->repeat = ctx->u.rep;
890
891 state->ptr = ctx->ptr;
892 DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
893 state->repeat = ctx->u.rep->prev;
894 PyObject_FREE(ctx->u.rep);
895
896 if (ret) {
897 RETURN_ON_ERROR(ret);
898 RETURN_SUCCESS;
899 }
900 RETURN_FAILURE;
901
902 case SRE_OP_MAX_UNTIL:
903 /* maximizing repeat */
904 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
905
906 /* FIXME: we probably need to deal with zero-width
907 matches in here... */
908
909 ctx->u.rep = state->repeat;
910 if (!ctx->u.rep)
911 RETURN_ERROR(SRE_ERROR_STATE);
912
913 state->ptr = ctx->ptr;
914
915 ctx->count = ctx->u.rep->count+1;
916
917 TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
918 ctx->ptr, ctx->count));
919
920 if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
921 /* not enough matches */
922 ctx->u.rep->count = ctx->count;
923 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
924 ctx->u.rep->pattern+3);
925 if (ret) {
926 RETURN_ON_ERROR(ret);
927 RETURN_SUCCESS;
928 }
929 ctx->u.rep->count = ctx->count-1;
930 state->ptr = ctx->ptr;
931 RETURN_FAILURE;
932 }
933
934 if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
935 ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
936 state->ptr != ctx->u.rep->last_ptr) {
937 /* we may have enough matches, but if we can
938 match another item, do so */
939 ctx->u.rep->count = ctx->count;
940 LASTMARK_SAVE();
941 MARK_PUSH(ctx->lastmark);
942 /* zero-width match protection */
943 DATA_PUSH(&ctx->u.rep->last_ptr);
944 ctx->u.rep->last_ptr = state->ptr;
945 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
946 ctx->u.rep->pattern+3);
947 DATA_POP(&ctx->u.rep->last_ptr);
948 if (ret) {
949 MARK_POP_DISCARD(ctx->lastmark);
950 RETURN_ON_ERROR(ret);
951 RETURN_SUCCESS;
952 }
953 MARK_POP(ctx->lastmark);
954 LASTMARK_RESTORE();
955 ctx->u.rep->count = ctx->count-1;
956 state->ptr = ctx->ptr;
957 }
958
959 /* cannot match more repeated items here. make sure the
960 tail matches */
961 state->repeat = ctx->u.rep->prev;
962 DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
963 RETURN_ON_SUCCESS(ret);
964 state->repeat = ctx->u.rep;
965 state->ptr = ctx->ptr;
966 RETURN_FAILURE;
967
968 case SRE_OP_MIN_UNTIL:
969 /* minimizing repeat */
970 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
971
972 ctx->u.rep = state->repeat;
973 if (!ctx->u.rep)
974 RETURN_ERROR(SRE_ERROR_STATE);
975
976 state->ptr = ctx->ptr;
977
978 ctx->count = ctx->u.rep->count+1;
979
980 TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern,
981 ctx->ptr, ctx->count, ctx->u.rep->pattern));
982
983 if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
984 /* not enough matches */
985 ctx->u.rep->count = ctx->count;
986 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
987 ctx->u.rep->pattern+3);
988 if (ret) {
989 RETURN_ON_ERROR(ret);
990 RETURN_SUCCESS;
991 }
992 ctx->u.rep->count = ctx->count-1;
993 state->ptr = ctx->ptr;
994 RETURN_FAILURE;
995 }
996
997 LASTMARK_SAVE();
998
999 /* see if the tail matches */
1000 state->repeat = ctx->u.rep->prev;
1001 DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
1002 if (ret) {
1003 RETURN_ON_ERROR(ret);
1004 RETURN_SUCCESS;
1005 }
1006
1007 state->repeat = ctx->u.rep;
1008 state->ptr = ctx->ptr;
1009
1010 LASTMARK_RESTORE();
1011
1012 if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1013 && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1014 state->ptr == ctx->u.rep->last_ptr)
1015 RETURN_FAILURE;
1016
1017 ctx->u.rep->count = ctx->count;
1018 /* zero-width match protection */
1019 DATA_PUSH(&ctx->u.rep->last_ptr);
1020 ctx->u.rep->last_ptr = state->ptr;
1021 DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1022 ctx->u.rep->pattern+3);
1023 DATA_POP(&ctx->u.rep->last_ptr);
1024 if (ret) {
1025 RETURN_ON_ERROR(ret);
1026 RETURN_SUCCESS;
1027 }
1028 ctx->u.rep->count = ctx->count-1;
1029 state->ptr = ctx->ptr;
1030 RETURN_FAILURE;
1031
1032 case SRE_OP_GROUPREF:
1033 /* match backreference */
1034 TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
1035 ctx->ptr, ctx->pattern[0]));
1036 i = ctx->pattern[0];
1037 {
1038 Py_ssize_t groupref = i+i;
1039 if (groupref >= state->lastmark) {
1040 RETURN_FAILURE;
1041 } else {
1042 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1043 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1044 if (!p || !e || e < p)
1045 RETURN_FAILURE;
1046 while (p < e) {
1047 if (ctx->ptr >= end || *ctx->ptr != *p)
1048 RETURN_FAILURE;
1049 p++;
1050 ctx->ptr++;
1051 }
1052 }
1053 }
1054 ctx->pattern++;
1055 break;
1056
1057 case SRE_OP_GROUPREF_IGNORE:
1058 /* match backreference */
1059 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
1060 ctx->ptr, ctx->pattern[0]));
1061 i = ctx->pattern[0];
1062 {
1063 Py_ssize_t groupref = i+i;
1064 if (groupref >= state->lastmark) {
1065 RETURN_FAILURE;
1066 } else {
1067 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1068 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1069 if (!p || !e || e < p)
1070 RETURN_FAILURE;
1071 while (p < e) {
1072 if (ctx->ptr >= end ||
1073 state->lower(*ctx->ptr) != state->lower(*p))
1074 RETURN_FAILURE;
1075 p++;
1076 ctx->ptr++;
1077 }
1078 }
1079 }
1080 ctx->pattern++;
1081 break;
1082
1083 case SRE_OP_GROUPREF_EXISTS:
1084 TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
1085 ctx->ptr, ctx->pattern[0]));
1086 /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1087 i = ctx->pattern[0];
1088 {
1089 Py_ssize_t groupref = i+i;
1090 if (groupref >= state->lastmark) {
1091 ctx->pattern += ctx->pattern[1];
1092 break;
1093 } else {
1094 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1095 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1096 if (!p || !e || e < p) {
1097 ctx->pattern += ctx->pattern[1];
1098 break;
1099 }
1100 }
1101 }
1102 ctx->pattern += 2;
1103 break;
1104
1105 case SRE_OP_ASSERT:
1106 /* assert subpattern */
1107 /* <ASSERT> <skip> <back> <pattern> */
1108 TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
1109 ctx->ptr, ctx->pattern[1]));
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001110 if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1])
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001111 RETURN_FAILURE;
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001112 state->ptr = ctx->ptr - ctx->pattern[1];
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02001113 DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2);
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001114 RETURN_ON_FAILURE(ret);
1115 ctx->pattern += ctx->pattern[0];
1116 break;
1117
1118 case SRE_OP_ASSERT_NOT:
1119 /* assert not subpattern */
1120 /* <ASSERT_NOT> <skip> <back> <pattern> */
1121 TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
1122 ctx->ptr, ctx->pattern[1]));
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001123 if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) {
1124 state->ptr = ctx->ptr - ctx->pattern[1];
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02001125 DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001126 if (ret) {
1127 RETURN_ON_ERROR(ret);
1128 RETURN_FAILURE;
1129 }
1130 }
1131 ctx->pattern += ctx->pattern[0];
1132 break;
1133
1134 case SRE_OP_FAILURE:
1135 /* immediate failure */
1136 TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
1137 RETURN_FAILURE;
1138
1139 default:
1140 TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
1141 ctx->pattern[-1]));
1142 RETURN_ERROR(SRE_ERROR_ILLEGAL);
1143 }
1144 }
1145
1146exit:
1147 ctx_pos = ctx->last_ctx_pos;
1148 jump = ctx->jump;
1149 DATA_POP_DISCARD(ctx);
1150 if (ctx_pos == -1)
1151 return ret;
1152 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1153
1154 switch (jump) {
1155 case JUMP_MAX_UNTIL_2:
1156 TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
1157 goto jump_max_until_2;
1158 case JUMP_MAX_UNTIL_3:
1159 TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
1160 goto jump_max_until_3;
1161 case JUMP_MIN_UNTIL_2:
1162 TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
1163 goto jump_min_until_2;
1164 case JUMP_MIN_UNTIL_3:
1165 TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
1166 goto jump_min_until_3;
1167 case JUMP_BRANCH:
1168 TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
1169 goto jump_branch;
1170 case JUMP_MAX_UNTIL_1:
1171 TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
1172 goto jump_max_until_1;
1173 case JUMP_MIN_UNTIL_1:
1174 TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
1175 goto jump_min_until_1;
1176 case JUMP_REPEAT:
1177 TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
1178 goto jump_repeat;
1179 case JUMP_REPEAT_ONE_1:
1180 TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
1181 goto jump_repeat_one_1;
1182 case JUMP_REPEAT_ONE_2:
1183 TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
1184 goto jump_repeat_one_2;
1185 case JUMP_MIN_REPEAT_ONE:
1186 TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
1187 goto jump_min_repeat_one;
1188 case JUMP_ASSERT:
1189 TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
1190 goto jump_assert;
1191 case JUMP_ASSERT_NOT:
1192 TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
1193 goto jump_assert_not;
1194 case JUMP_NONE:
1195 TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
1196 ctx->ptr, ret));
1197 break;
1198 }
1199
1200 return ret; /* should never get here */
1201}
1202
1203LOCAL(Py_ssize_t)
1204SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1205{
1206 SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1207 SRE_CHAR* end = (SRE_CHAR *)state->end;
1208 Py_ssize_t status = 0;
1209 Py_ssize_t prefix_len = 0;
1210 Py_ssize_t prefix_skip = 0;
1211 SRE_CODE* prefix = NULL;
1212 SRE_CODE* charset = NULL;
1213 SRE_CODE* overlap = NULL;
1214 int flags = 0;
1215
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001216 if (ptr > end)
1217 return 0;
1218
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001219 if (pattern[0] == SRE_OP_INFO) {
1220 /* optimization info block */
1221 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
1222
1223 flags = pattern[2];
1224
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001225 if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) {
1226 TRACE(("reject (got %u chars, need %u)\n",
1227 (unsigned int)(end - ptr), pattern[3]));
1228 return 0;
1229 }
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001230 if (pattern[3] > 1) {
1231 /* adjust end point (but make sure we leave at least one
1232 character in there, so literal search will work) */
1233 end -= pattern[3] - 1;
1234 if (end <= ptr)
1235 end = ptr;
1236 }
1237
1238 if (flags & SRE_INFO_PREFIX) {
1239 /* pattern starts with a known prefix */
1240 /* <length> <skip> <prefix data> <overlap data> */
1241 prefix_len = pattern[5];
1242 prefix_skip = pattern[6];
1243 prefix = pattern + 7;
1244 overlap = prefix + prefix_len - 1;
1245 } else if (flags & SRE_INFO_CHARSET)
1246 /* pattern starts with a character from a known set */
1247 /* <charset> */
1248 charset = pattern + 5;
1249
1250 pattern += 1 + pattern[1];
1251 }
1252
1253 TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n",
1254 prefix, prefix_len, prefix_skip));
1255 TRACE(("charset = %p\n", charset));
1256
Serhiy Storchaka66dc4642015-06-21 14:06:55 +03001257 if (prefix_len == 1) {
1258 /* pattern starts with a literal character */
1259 SRE_CHAR c = (SRE_CHAR) prefix[0];
1260#if SIZEOF_SRE_CHAR < 4
1261 if ((SRE_CODE) c != prefix[0])
1262 return 0; /* literal can't match: doesn't fit in char width */
1263#endif
1264 end = (SRE_CHAR *)state->end;
1265 while (ptr < end) {
1266 while (*ptr != c) {
1267 if (++ptr >= end)
1268 return 0;
1269 }
1270 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1271 state->start = ptr;
1272 state->ptr = ptr + prefix_skip;
1273 if (flags & SRE_INFO_LITERAL)
1274 return 1; /* we got all of it */
1275 status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1276 if (status != 0)
1277 return status;
1278 ++ptr;
1279 }
1280 return 0;
1281 }
1282
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001283 if (prefix_len > 1) {
1284 /* pattern starts with a known prefix. use the overlap
1285 table to skip forward as fast as we possibly can */
1286 Py_ssize_t i = 0;
1287
1288 end = (SRE_CHAR *)state->end;
1289 if (prefix_len > end - ptr)
1290 return 0;
1291#if SIZEOF_SRE_CHAR < 4
1292 for (i = 0; i < prefix_len; i++)
1293 if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1294 return 0; /* literal can't match: doesn't fit in char width */
1295#endif
1296 while (ptr < end) {
1297 SRE_CHAR c = (SRE_CHAR) prefix[0];
1298 while (*ptr++ != c) {
1299 if (ptr >= end)
1300 return 0;
1301 }
1302 if (ptr >= end)
1303 return 0;
1304
1305 i = 1;
1306 do {
1307 if (*ptr == (SRE_CHAR) prefix[i]) {
1308 if (++i != prefix_len) {
1309 if (++ptr >= end)
1310 return 0;
1311 continue;
1312 }
1313 /* found a potential match */
1314 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1315 state->start = ptr - (prefix_len - 1);
1316 state->ptr = ptr - (prefix_len - prefix_skip - 1);
1317 if (flags & SRE_INFO_LITERAL)
1318 return 1; /* we got all of it */
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03001319 status = SRE(match)(state, pattern + 2*prefix_skip, 0);
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001320 if (status != 0)
1321 return status;
1322 /* close but no cigar -- try again */
1323 if (++ptr >= end)
1324 return 0;
1325 }
1326 i = overlap[i];
1327 } while (i != 0);
1328 }
1329 return 0;
1330 }
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001331
Serhiy Storchaka66dc4642015-06-21 14:06:55 +03001332 if (charset) {
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001333 /* pattern starts with a character from a known set */
1334 end = (SRE_CHAR *)state->end;
1335 for (;;) {
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +02001336 while (ptr < end && !SRE(charset)(state, charset, *ptr))
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001337 ptr++;
1338 if (ptr >= end)
1339 return 0;
1340 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1341 state->start = ptr;
1342 state->ptr = ptr;
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03001343 status = SRE(match)(state, pattern, 0);
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001344 if (status != 0)
1345 break;
1346 ptr++;
1347 }
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001348 } else {
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001349 /* general case */
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001350 assert(ptr <= end);
1351 while (1) {
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001352 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001353 state->start = state->ptr = ptr;
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03001354 status = SRE(match)(state, pattern, 0);
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001355 if (status != 0 || ptr >= end)
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001356 break;
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001357 ptr++;
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001358 }
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001359 }
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001360
1361 return status;
1362}
1363
1364#undef SRE_CHAR
1365#undef SIZEOF_SRE_CHAR
1366#undef SRE
1367
1368/* vim:ts=4:sw=4:et
1369*/