blob: 128c71e340d51321d6d754d5686f8577a1467f76 [file] [log] [blame]
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001/*
2 * Secret Labs' Regular Expression Engine
3 *
4 * regular expression matching engine
5 *
6 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
7 *
8 * See the _sre.c file for information on usage and redistribution.
9 */
10
11/* String matching engine */
12
13/* This file is included three times, with different character settings */
14
15LOCAL(int)
16SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
17{
18 /* check if pointer is at given position */
19
20 Py_ssize_t thisp, thatp;
21
22 switch (at) {
23
24 case SRE_AT_BEGINNING:
25 case SRE_AT_BEGINNING_STRING:
26 return ((void*) ptr == state->beginning);
27
28 case SRE_AT_BEGINNING_LINE:
29 return ((void*) ptr == state->beginning ||
30 SRE_IS_LINEBREAK((int) ptr[-1]));
31
32 case SRE_AT_END:
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +030033 return (((SRE_CHAR *)state->end - ptr == 1 &&
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +030034 SRE_IS_LINEBREAK((int) ptr[0])) ||
35 ((void*) ptr == state->end));
36
37 case SRE_AT_END_LINE:
38 return ((void*) ptr == state->end ||
39 SRE_IS_LINEBREAK((int) ptr[0]));
40
41 case SRE_AT_END_STRING:
42 return ((void*) ptr == state->end);
43
44 case SRE_AT_BOUNDARY:
45 if (state->beginning == state->end)
46 return 0;
47 thatp = ((void*) ptr > state->beginning) ?
48 SRE_IS_WORD((int) ptr[-1]) : 0;
49 thisp = ((void*) ptr < state->end) ?
50 SRE_IS_WORD((int) ptr[0]) : 0;
51 return thisp != thatp;
52
53 case SRE_AT_NON_BOUNDARY:
54 if (state->beginning == state->end)
55 return 0;
56 thatp = ((void*) ptr > state->beginning) ?
57 SRE_IS_WORD((int) ptr[-1]) : 0;
58 thisp = ((void*) ptr < state->end) ?
59 SRE_IS_WORD((int) ptr[0]) : 0;
60 return thisp == thatp;
61
62 case SRE_AT_LOC_BOUNDARY:
63 if (state->beginning == state->end)
64 return 0;
65 thatp = ((void*) ptr > state->beginning) ?
66 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
67 thisp = ((void*) ptr < state->end) ?
68 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
69 return thisp != thatp;
70
71 case SRE_AT_LOC_NON_BOUNDARY:
72 if (state->beginning == state->end)
73 return 0;
74 thatp = ((void*) ptr > state->beginning) ?
75 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
76 thisp = ((void*) ptr < state->end) ?
77 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
78 return thisp == thatp;
79
80 case SRE_AT_UNI_BOUNDARY:
81 if (state->beginning == state->end)
82 return 0;
83 thatp = ((void*) ptr > state->beginning) ?
84 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
85 thisp = ((void*) ptr < state->end) ?
86 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
87 return thisp != thatp;
88
89 case SRE_AT_UNI_NON_BOUNDARY:
90 if (state->beginning == state->end)
91 return 0;
92 thatp = ((void*) ptr > state->beginning) ?
93 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
94 thisp = ((void*) ptr < state->end) ?
95 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
96 return thisp == thatp;
97
98 }
99
100 return 0;
101}
102
103LOCAL(int)
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200104SRE(charset)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch)
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300105{
106 /* check if character is a member of the given set */
107
108 int ok = 1;
109
110 for (;;) {
111 switch (*set++) {
112
113 case SRE_OP_FAILURE:
114 return !ok;
115
116 case SRE_OP_LITERAL:
117 /* <LITERAL> <code> */
118 if (ch == set[0])
119 return ok;
120 set++;
121 break;
122
123 case SRE_OP_CATEGORY:
124 /* <CATEGORY> <code> */
125 if (sre_category(set[0], (int) ch))
126 return ok;
127 set++;
128 break;
129
130 case SRE_OP_CHARSET:
131 /* <CHARSET> <bitmap> */
132 if (ch < 256 &&
133 (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
134 return ok;
135 set += 256/SRE_CODE_BITS;
136 break;
137
138 case SRE_OP_RANGE:
139 /* <RANGE> <lower> <upper> */
140 if (set[0] <= ch && ch <= set[1])
141 return ok;
142 set += 2;
143 break;
144
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200145 case SRE_OP_RANGE_IGNORE:
146 /* <RANGE_IGNORE> <lower> <upper> */
147 {
148 SRE_CODE uch;
149 /* ch is already lower cased */
150 if (set[0] <= ch && ch <= set[1])
151 return ok;
152 uch = state->upper(ch);
153 if (set[0] <= uch && uch <= set[1])
154 return ok;
155 set += 2;
156 break;
157 }
158
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300159 case SRE_OP_NEGATE:
160 ok = !ok;
161 break;
162
163 case SRE_OP_BIGCHARSET:
164 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
165 {
166 Py_ssize_t count, block;
167 count = *(set++);
168
169 if (ch < 0x10000u)
170 block = ((unsigned char*)set)[ch >> 8];
171 else
172 block = -1;
173 set += 256/sizeof(SRE_CODE);
174 if (block >=0 &&
175 (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
176 (1u << (ch & (SRE_CODE_BITS-1)))))
177 return ok;
178 set += count * (256/SRE_CODE_BITS);
179 break;
180 }
181
182 default:
183 /* internal error -- there's not much we can do about it
184 here, so let's just pretend it didn't match... */
185 return 0;
186 }
187 }
188}
189
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300190LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all);
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300191
192LOCAL(Py_ssize_t)
193SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
194{
195 SRE_CODE chr;
196 SRE_CHAR c;
197 SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
198 SRE_CHAR* end = (SRE_CHAR *)state->end;
199 Py_ssize_t i;
200
201 /* adjust end */
202 if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
203 end = ptr + maxcount;
204
205 switch (pattern[0]) {
206
207 case SRE_OP_IN:
208 /* repeated set */
209 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200210 while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300211 ptr++;
212 break;
213
214 case SRE_OP_ANY:
215 /* repeated dot wildcard. */
216 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
217 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
218 ptr++;
219 break;
220
221 case SRE_OP_ANY_ALL:
222 /* repeated dot wildcard. skip to the end of the target
223 string, and backtrack from there */
224 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
225 ptr = end;
226 break;
227
228 case SRE_OP_LITERAL:
229 /* repeated literal */
230 chr = pattern[1];
231 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
232 c = (SRE_CHAR) chr;
233#if SIZEOF_SRE_CHAR < 4
234 if ((SRE_CODE) c != chr)
235 ; /* literal can't match: doesn't fit in char width */
236 else
237#endif
238 while (ptr < end && *ptr == c)
239 ptr++;
240 break;
241
242 case SRE_OP_LITERAL_IGNORE:
243 /* repeated literal */
244 chr = pattern[1];
245 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
246 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
247 ptr++;
248 break;
249
250 case SRE_OP_NOT_LITERAL:
251 /* repeated non-literal */
252 chr = pattern[1];
253 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
254 c = (SRE_CHAR) chr;
255#if SIZEOF_SRE_CHAR < 4
256 if ((SRE_CODE) c != chr)
257 ptr = end; /* literal can't match: doesn't fit in char width */
258 else
259#endif
260 while (ptr < end && *ptr != c)
261 ptr++;
262 break;
263
264 case SRE_OP_NOT_LITERAL_IGNORE:
265 /* repeated non-literal */
266 chr = pattern[1];
267 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
268 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
269 ptr++;
270 break;
271
272 default:
273 /* repeated single character pattern */
274 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
275 while ((SRE_CHAR*) state->ptr < end) {
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300276 i = SRE(match)(state, pattern, 0);
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300277 if (i < 0)
278 return i;
279 if (!i)
280 break;
281 }
282 TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
283 (SRE_CHAR*) state->ptr - ptr));
284 return (SRE_CHAR*) state->ptr - ptr;
285 }
286
287 TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
288 ptr - (SRE_CHAR*) state->ptr));
289 return ptr - (SRE_CHAR*) state->ptr;
290}
291
292#if 0 /* not used in this release */
293LOCAL(int)
294SRE(info)(SRE_STATE* state, SRE_CODE* pattern)
295{
296 /* check if an SRE_OP_INFO block matches at the current position.
297 returns the number of SRE_CODE objects to skip if successful, 0
298 if no match */
299
300 SRE_CHAR* end = (SRE_CHAR*) state->end;
301 SRE_CHAR* ptr = (SRE_CHAR*) state->ptr;
302 Py_ssize_t i;
303
304 /* check minimal length */
305 if (pattern[3] && end - ptr < pattern[3])
306 return 0;
307
308 /* check known prefix */
309 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
310 /* <length> <skip> <prefix data> <overlap data> */
311 for (i = 0; i < pattern[5]; i++)
312 if ((SRE_CODE) ptr[i] != pattern[7 + i])
313 return 0;
314 return pattern[0] + 2 * pattern[6];
315 }
316 return pattern[0];
317}
318#endif
319
320/* The macros below should be used to protect recursive SRE(match)()
321 * calls that *failed* and do *not* return immediately (IOW, those
322 * that will backtrack). Explaining:
323 *
324 * - Recursive SRE(match)() returned true: that's usually a success
325 * (besides atypical cases like ASSERT_NOT), therefore there's no
326 * reason to restore lastmark;
327 *
328 * - Recursive SRE(match)() returned false but the current SRE(match)()
329 * is returning to the caller: If the current SRE(match)() is the
330 * top function of the recursion, returning false will be a matching
331 * failure, and it doesn't matter where lastmark is pointing to.
332 * If it's *not* the top function, it will be a recursive SRE(match)()
333 * failure by itself, and the calling SRE(match)() will have to deal
334 * with the failure by the same rules explained here (it will restore
335 * lastmark by itself if necessary);
336 *
337 * - Recursive SRE(match)() returned false, and will continue the
338 * outside 'for' loop: must be protected when breaking, since the next
339 * OP could potentially depend on lastmark;
340 *
341 * - Recursive SRE(match)() returned false, and will be called again
342 * inside a local for/while loop: must be protected between each
343 * loop iteration, since the recursive SRE(match)() could do anything,
344 * and could potentially depend on lastmark.
345 *
346 * For more information, check the discussion at SF patch #712900.
347 */
348#define LASTMARK_SAVE() \
349 do { \
350 ctx->lastmark = state->lastmark; \
351 ctx->lastindex = state->lastindex; \
352 } while (0)
353#define LASTMARK_RESTORE() \
354 do { \
355 state->lastmark = ctx->lastmark; \
356 state->lastindex = ctx->lastindex; \
357 } while (0)
358
359#define RETURN_ERROR(i) do { return i; } while(0)
360#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
361#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
362
363#define RETURN_ON_ERROR(i) \
364 do { if (i < 0) RETURN_ERROR(i); } while (0)
365#define RETURN_ON_SUCCESS(i) \
366 do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
367#define RETURN_ON_FAILURE(i) \
368 do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
369
370#define SFY(x) #x
371
372#define DATA_STACK_ALLOC(state, type, ptr) \
373do { \
374 alloc_pos = state->data_stack_base; \
375 TRACE(("allocating %s in %" PY_FORMAT_SIZE_T "d " \
376 "(%" PY_FORMAT_SIZE_T "d)\n", \
377 SFY(type), alloc_pos, sizeof(type))); \
378 if (sizeof(type) > state->data_stack_size - alloc_pos) { \
379 int j = data_stack_grow(state, sizeof(type)); \
380 if (j < 0) return j; \
381 if (ctx_pos != -1) \
382 DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
383 } \
384 ptr = (type*)(state->data_stack+alloc_pos); \
385 state->data_stack_base += sizeof(type); \
386} while (0)
387
388#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
389do { \
390 TRACE(("looking up %s at %" PY_FORMAT_SIZE_T "d\n", SFY(type), pos)); \
391 ptr = (type*)(state->data_stack+pos); \
392} while (0)
393
394#define DATA_STACK_PUSH(state, data, size) \
395do { \
396 TRACE(("copy data in %p to %" PY_FORMAT_SIZE_T "d " \
397 "(%" PY_FORMAT_SIZE_T "d)\n", \
398 data, state->data_stack_base, size)); \
399 if (size > state->data_stack_size - state->data_stack_base) { \
400 int j = data_stack_grow(state, size); \
401 if (j < 0) return j; \
402 if (ctx_pos != -1) \
403 DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
404 } \
405 memcpy(state->data_stack+state->data_stack_base, data, size); \
406 state->data_stack_base += size; \
407} while (0)
408
409#define DATA_STACK_POP(state, data, size, discard) \
410do { \
411 TRACE(("copy data to %p from %" PY_FORMAT_SIZE_T "d " \
412 "(%" PY_FORMAT_SIZE_T "d)\n", \
413 data, state->data_stack_base-size, size)); \
414 memcpy(data, state->data_stack+state->data_stack_base-size, size); \
415 if (discard) \
416 state->data_stack_base -= size; \
417} while (0)
418
419#define DATA_STACK_POP_DISCARD(state, size) \
420do { \
421 TRACE(("discard data from %" PY_FORMAT_SIZE_T "d " \
422 "(%" PY_FORMAT_SIZE_T "d)\n", \
423 state->data_stack_base-size, size)); \
424 state->data_stack_base -= size; \
425} while(0)
426
427#define DATA_PUSH(x) \
428 DATA_STACK_PUSH(state, (x), sizeof(*(x)))
429#define DATA_POP(x) \
430 DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
431#define DATA_POP_DISCARD(x) \
432 DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
433#define DATA_ALLOC(t,p) \
434 DATA_STACK_ALLOC(state, t, p)
435#define DATA_LOOKUP_AT(t,p,pos) \
436 DATA_STACK_LOOKUP_AT(state,t,p,pos)
437
438#define MARK_PUSH(lastmark) \
439 do if (lastmark > 0) { \
440 i = lastmark; /* ctx->lastmark may change if reallocated */ \
441 DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
442 } while (0)
443#define MARK_POP(lastmark) \
444 do if (lastmark > 0) { \
445 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
446 } while (0)
447#define MARK_POP_KEEP(lastmark) \
448 do if (lastmark > 0) { \
449 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
450 } while (0)
451#define MARK_POP_DISCARD(lastmark) \
452 do if (lastmark > 0) { \
453 DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
454 } while (0)
455
456#define JUMP_NONE 0
457#define JUMP_MAX_UNTIL_1 1
458#define JUMP_MAX_UNTIL_2 2
459#define JUMP_MAX_UNTIL_3 3
460#define JUMP_MIN_UNTIL_1 4
461#define JUMP_MIN_UNTIL_2 5
462#define JUMP_MIN_UNTIL_3 6
463#define JUMP_REPEAT 7
464#define JUMP_REPEAT_ONE_1 8
465#define JUMP_REPEAT_ONE_2 9
466#define JUMP_MIN_REPEAT_ONE 10
467#define JUMP_BRANCH 11
468#define JUMP_ASSERT 12
469#define JUMP_ASSERT_NOT 13
470
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200471#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, matchall) \
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300472 DATA_ALLOC(SRE(match_context), nextctx); \
473 nextctx->last_ctx_pos = ctx_pos; \
474 nextctx->jump = jumpvalue; \
475 nextctx->pattern = nextpattern; \
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200476 nextctx->match_all = matchall; \
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300477 ctx_pos = alloc_pos; \
478 ctx = nextctx; \
479 goto entrance; \
480 jumplabel: \
481 while (0) /* gcc doesn't like labels at end of scopes */ \
482
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200483#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
484 DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->match_all)
485
486#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
487 DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
488
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300489typedef struct {
490 Py_ssize_t last_ctx_pos;
491 Py_ssize_t jump;
492 SRE_CHAR* ptr;
493 SRE_CODE* pattern;
494 Py_ssize_t count;
495 Py_ssize_t lastmark;
496 Py_ssize_t lastindex;
497 union {
498 SRE_CODE chr;
499 SRE_REPEAT* rep;
500 } u;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200501 int match_all;
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300502} SRE(match_context);
503
504/* check if string matches the given pattern. returns <0 for
505 error, 0 for failure, and 1 for success */
506LOCAL(Py_ssize_t)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300507SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all)
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300508{
509 SRE_CHAR* end = (SRE_CHAR *)state->end;
510 Py_ssize_t alloc_pos, ctx_pos = -1;
511 Py_ssize_t i, ret = 0;
512 Py_ssize_t jump;
513 unsigned int sigcount=0;
514
515 SRE(match_context)* ctx;
516 SRE(match_context)* nextctx;
517
518 TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
519
520 DATA_ALLOC(SRE(match_context), ctx);
521 ctx->last_ctx_pos = -1;
522 ctx->jump = JUMP_NONE;
523 ctx->pattern = pattern;
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300524 ctx->match_all = match_all;
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300525 ctx_pos = alloc_pos;
526
527entrance:
528
529 ctx->ptr = (SRE_CHAR *)state->ptr;
530
531 if (ctx->pattern[0] == SRE_OP_INFO) {
532 /* optimization info block */
533 /* <INFO> <1=skip> <2=flags> <3=min> ... */
534 if (ctx->pattern[3] && (Py_uintptr_t)(end - ctx->ptr) < ctx->pattern[3]) {
535 TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, "
536 "need %" PY_FORMAT_SIZE_T "d)\n",
537 end - ctx->ptr, (Py_ssize_t) ctx->pattern[3]));
538 RETURN_FAILURE;
539 }
540 ctx->pattern += ctx->pattern[1] + 1;
541 }
542
543 for (;;) {
544 ++sigcount;
545 if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
546 RETURN_ERROR(SRE_ERROR_INTERRUPTED);
547
548 switch (*ctx->pattern++) {
549
550 case SRE_OP_MARK:
551 /* set mark */
552 /* <MARK> <gid> */
553 TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
554 ctx->ptr, ctx->pattern[0]));
555 i = ctx->pattern[0];
556 if (i & 1)
557 state->lastindex = i/2 + 1;
558 if (i > state->lastmark) {
559 /* state->lastmark is the highest valid index in the
560 state->mark array. If it is increased by more than 1,
561 the intervening marks must be set to NULL to signal
562 that these marks have not been encountered. */
563 Py_ssize_t j = state->lastmark + 1;
564 while (j < i)
565 state->mark[j++] = NULL;
566 state->lastmark = i;
567 }
568 state->mark[i] = ctx->ptr;
569 ctx->pattern++;
570 break;
571
572 case SRE_OP_LITERAL:
573 /* match literal string */
574 /* <LITERAL> <code> */
575 TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
576 ctx->ptr, *ctx->pattern));
577 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
578 RETURN_FAILURE;
579 ctx->pattern++;
580 ctx->ptr++;
581 break;
582
583 case SRE_OP_NOT_LITERAL:
584 /* match anything that is not literal character */
585 /* <NOT_LITERAL> <code> */
586 TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
587 ctx->ptr, *ctx->pattern));
588 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
589 RETURN_FAILURE;
590 ctx->pattern++;
591 ctx->ptr++;
592 break;
593
594 case SRE_OP_SUCCESS:
595 /* end of pattern */
596 TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200597 if (!ctx->match_all || ctx->ptr == state->end) {
598 state->ptr = ctx->ptr;
599 RETURN_SUCCESS;
600 }
601 RETURN_FAILURE;
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300602
603 case SRE_OP_AT:
604 /* match at given position */
605 /* <AT> <code> */
606 TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
607 if (!SRE(at)(state, ctx->ptr, *ctx->pattern))
608 RETURN_FAILURE;
609 ctx->pattern++;
610 break;
611
612 case SRE_OP_CATEGORY:
613 /* match at given category */
614 /* <CATEGORY> <code> */
615 TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
616 ctx->ptr, *ctx->pattern));
617 if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
618 RETURN_FAILURE;
619 ctx->pattern++;
620 ctx->ptr++;
621 break;
622
623 case SRE_OP_ANY:
624 /* match anything (except a newline) */
625 /* <ANY> */
626 TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
627 if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
628 RETURN_FAILURE;
629 ctx->ptr++;
630 break;
631
632 case SRE_OP_ANY_ALL:
633 /* match anything */
634 /* <ANY_ALL> */
635 TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
636 if (ctx->ptr >= end)
637 RETURN_FAILURE;
638 ctx->ptr++;
639 break;
640
641 case SRE_OP_IN:
642 /* match set member (or non_member) */
643 /* <IN> <skip> <set> */
644 TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200645 if (ctx->ptr >= end ||
646 !SRE(charset)(state, ctx->pattern + 1, *ctx->ptr))
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300647 RETURN_FAILURE;
648 ctx->pattern += ctx->pattern[0];
649 ctx->ptr++;
650 break;
651
652 case SRE_OP_LITERAL_IGNORE:
653 TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
654 ctx->pattern, ctx->ptr, ctx->pattern[0]));
655 if (ctx->ptr >= end ||
656 state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
657 RETURN_FAILURE;
658 ctx->pattern++;
659 ctx->ptr++;
660 break;
661
662 case SRE_OP_NOT_LITERAL_IGNORE:
663 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
664 ctx->pattern, ctx->ptr, *ctx->pattern));
665 if (ctx->ptr >= end ||
666 state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
667 RETURN_FAILURE;
668 ctx->pattern++;
669 ctx->ptr++;
670 break;
671
672 case SRE_OP_IN_IGNORE:
673 TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
674 if (ctx->ptr >= end
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200675 || !SRE(charset)(state, ctx->pattern+1,
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300676 (SRE_CODE)state->lower(*ctx->ptr)))
677 RETURN_FAILURE;
678 ctx->pattern += ctx->pattern[0];
679 ctx->ptr++;
680 break;
681
682 case SRE_OP_JUMP:
683 case SRE_OP_INFO:
684 /* jump forward */
685 /* <JUMP> <offset> */
686 TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
687 ctx->ptr, ctx->pattern[0]));
688 ctx->pattern += ctx->pattern[0];
689 break;
690
691 case SRE_OP_BRANCH:
692 /* alternation */
693 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
694 TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
695 LASTMARK_SAVE();
696 ctx->u.rep = state->repeat;
697 if (ctx->u.rep)
698 MARK_PUSH(ctx->lastmark);
699 for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
700 if (ctx->pattern[1] == SRE_OP_LITERAL &&
701 (ctx->ptr >= end ||
702 (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
703 continue;
704 if (ctx->pattern[1] == SRE_OP_IN &&
705 (ctx->ptr >= end ||
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200706 !SRE(charset)(state, ctx->pattern + 3,
707 (SRE_CODE) *ctx->ptr)))
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300708 continue;
709 state->ptr = ctx->ptr;
710 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
711 if (ret) {
712 if (ctx->u.rep)
713 MARK_POP_DISCARD(ctx->lastmark);
714 RETURN_ON_ERROR(ret);
715 RETURN_SUCCESS;
716 }
717 if (ctx->u.rep)
718 MARK_POP_KEEP(ctx->lastmark);
719 LASTMARK_RESTORE();
720 }
721 if (ctx->u.rep)
722 MARK_POP_DISCARD(ctx->lastmark);
723 RETURN_FAILURE;
724
725 case SRE_OP_REPEAT_ONE:
726 /* match repeated sequence (maximizing regexp) */
727
728 /* this operator only works if the repeated item is
729 exactly one character wide, and we're not already
730 collecting backtracking points. for other cases,
731 use the MAX_REPEAT operator */
732
733 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
734
735 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
736 ctx->pattern[1], ctx->pattern[2]));
737
738 if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
739 RETURN_FAILURE; /* cannot match */
740
741 state->ptr = ctx->ptr;
742
743 ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[2]);
744 RETURN_ON_ERROR(ret);
745 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
746 ctx->count = ret;
747 ctx->ptr += ctx->count;
748
749 /* when we arrive here, count contains the number of
750 matches, and ctx->ptr points to the tail of the target
751 string. check if the rest of the pattern matches,
752 and backtrack if not. */
753
754 if (ctx->count < (Py_ssize_t) ctx->pattern[1])
755 RETURN_FAILURE;
756
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200757 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300758 ctx->ptr == state->end) {
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300759 /* tail is empty. we're finished */
760 state->ptr = ctx->ptr;
761 RETURN_SUCCESS;
762 }
763
764 LASTMARK_SAVE();
765
766 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
767 /* tail starts with a literal. skip positions where
768 the rest of the pattern cannot possibly match */
769 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
770 for (;;) {
771 while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
772 (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
773 ctx->ptr--;
774 ctx->count--;
775 }
776 if (ctx->count < (Py_ssize_t) ctx->pattern[1])
777 break;
778 state->ptr = ctx->ptr;
779 DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
780 ctx->pattern+ctx->pattern[0]);
781 if (ret) {
782 RETURN_ON_ERROR(ret);
783 RETURN_SUCCESS;
784 }
785
786 LASTMARK_RESTORE();
787
788 ctx->ptr--;
789 ctx->count--;
790 }
791
792 } else {
793 /* general case */
794 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
795 state->ptr = ctx->ptr;
796 DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
797 ctx->pattern+ctx->pattern[0]);
798 if (ret) {
799 RETURN_ON_ERROR(ret);
800 RETURN_SUCCESS;
801 }
802 ctx->ptr--;
803 ctx->count--;
804 LASTMARK_RESTORE();
805 }
806 }
807 RETURN_FAILURE;
808
809 case SRE_OP_MIN_REPEAT_ONE:
810 /* match repeated sequence (minimizing regexp) */
811
812 /* this operator only works if the repeated item is
813 exactly one character wide, and we're not already
814 collecting backtracking points. for other cases,
815 use the MIN_REPEAT operator */
816
817 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
818
819 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
820 ctx->pattern[1], ctx->pattern[2]));
821
822 if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
823 RETURN_FAILURE; /* cannot match */
824
825 state->ptr = ctx->ptr;
826
827 if (ctx->pattern[1] == 0)
828 ctx->count = 0;
829 else {
830 /* count using pattern min as the maximum */
831 ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[1]);
832 RETURN_ON_ERROR(ret);
833 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
834 if (ret < (Py_ssize_t) ctx->pattern[1])
835 /* didn't match minimum number of times */
836 RETURN_FAILURE;
837 /* advance past minimum matches of repeat */
838 ctx->count = ret;
839 ctx->ptr += ctx->count;
840 }
841
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200842 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300843 (!match_all || ctx->ptr == state->end)) {
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300844 /* tail is empty. we're finished */
845 state->ptr = ctx->ptr;
846 RETURN_SUCCESS;
847
848 } else {
849 /* general case */
850 LASTMARK_SAVE();
851 while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
852 || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
853 state->ptr = ctx->ptr;
854 DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
855 ctx->pattern+ctx->pattern[0]);
856 if (ret) {
857 RETURN_ON_ERROR(ret);
858 RETURN_SUCCESS;
859 }
860 state->ptr = ctx->ptr;
861 ret = SRE(count)(state, ctx->pattern+3, 1);
862 RETURN_ON_ERROR(ret);
863 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
864 if (ret == 0)
865 break;
866 assert(ret == 1);
867 ctx->ptr++;
868 ctx->count++;
869 LASTMARK_RESTORE();
870 }
871 }
872 RETURN_FAILURE;
873
874 case SRE_OP_REPEAT:
875 /* create repeat context. all the hard work is done
876 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
877 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
878 TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
879 ctx->pattern[1], ctx->pattern[2]));
880
881 /* install new repeat context */
882 ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
883 if (!ctx->u.rep) {
884 PyErr_NoMemory();
885 RETURN_FAILURE;
886 }
887 ctx->u.rep->count = -1;
888 ctx->u.rep->pattern = ctx->pattern;
889 ctx->u.rep->prev = state->repeat;
890 ctx->u.rep->last_ptr = NULL;
891 state->repeat = ctx->u.rep;
892
893 state->ptr = ctx->ptr;
894 DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
895 state->repeat = ctx->u.rep->prev;
896 PyObject_FREE(ctx->u.rep);
897
898 if (ret) {
899 RETURN_ON_ERROR(ret);
900 RETURN_SUCCESS;
901 }
902 RETURN_FAILURE;
903
904 case SRE_OP_MAX_UNTIL:
905 /* maximizing repeat */
906 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
907
908 /* FIXME: we probably need to deal with zero-width
909 matches in here... */
910
911 ctx->u.rep = state->repeat;
912 if (!ctx->u.rep)
913 RETURN_ERROR(SRE_ERROR_STATE);
914
915 state->ptr = ctx->ptr;
916
917 ctx->count = ctx->u.rep->count+1;
918
919 TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
920 ctx->ptr, ctx->count));
921
922 if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
923 /* not enough matches */
924 ctx->u.rep->count = ctx->count;
925 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
926 ctx->u.rep->pattern+3);
927 if (ret) {
928 RETURN_ON_ERROR(ret);
929 RETURN_SUCCESS;
930 }
931 ctx->u.rep->count = ctx->count-1;
932 state->ptr = ctx->ptr;
933 RETURN_FAILURE;
934 }
935
936 if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
937 ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
938 state->ptr != ctx->u.rep->last_ptr) {
939 /* we may have enough matches, but if we can
940 match another item, do so */
941 ctx->u.rep->count = ctx->count;
942 LASTMARK_SAVE();
943 MARK_PUSH(ctx->lastmark);
944 /* zero-width match protection */
945 DATA_PUSH(&ctx->u.rep->last_ptr);
946 ctx->u.rep->last_ptr = state->ptr;
947 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
948 ctx->u.rep->pattern+3);
949 DATA_POP(&ctx->u.rep->last_ptr);
950 if (ret) {
951 MARK_POP_DISCARD(ctx->lastmark);
952 RETURN_ON_ERROR(ret);
953 RETURN_SUCCESS;
954 }
955 MARK_POP(ctx->lastmark);
956 LASTMARK_RESTORE();
957 ctx->u.rep->count = ctx->count-1;
958 state->ptr = ctx->ptr;
959 }
960
961 /* cannot match more repeated items here. make sure the
962 tail matches */
963 state->repeat = ctx->u.rep->prev;
964 DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
965 RETURN_ON_SUCCESS(ret);
966 state->repeat = ctx->u.rep;
967 state->ptr = ctx->ptr;
968 RETURN_FAILURE;
969
970 case SRE_OP_MIN_UNTIL:
971 /* minimizing repeat */
972 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
973
974 ctx->u.rep = state->repeat;
975 if (!ctx->u.rep)
976 RETURN_ERROR(SRE_ERROR_STATE);
977
978 state->ptr = ctx->ptr;
979
980 ctx->count = ctx->u.rep->count+1;
981
982 TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern,
983 ctx->ptr, ctx->count, ctx->u.rep->pattern));
984
985 if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
986 /* not enough matches */
987 ctx->u.rep->count = ctx->count;
988 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
989 ctx->u.rep->pattern+3);
990 if (ret) {
991 RETURN_ON_ERROR(ret);
992 RETURN_SUCCESS;
993 }
994 ctx->u.rep->count = ctx->count-1;
995 state->ptr = ctx->ptr;
996 RETURN_FAILURE;
997 }
998
999 LASTMARK_SAVE();
1000
1001 /* see if the tail matches */
1002 state->repeat = ctx->u.rep->prev;
1003 DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
1004 if (ret) {
1005 RETURN_ON_ERROR(ret);
1006 RETURN_SUCCESS;
1007 }
1008
1009 state->repeat = ctx->u.rep;
1010 state->ptr = ctx->ptr;
1011
1012 LASTMARK_RESTORE();
1013
1014 if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1015 && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1016 state->ptr == ctx->u.rep->last_ptr)
1017 RETURN_FAILURE;
1018
1019 ctx->u.rep->count = ctx->count;
1020 /* zero-width match protection */
1021 DATA_PUSH(&ctx->u.rep->last_ptr);
1022 ctx->u.rep->last_ptr = state->ptr;
1023 DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1024 ctx->u.rep->pattern+3);
1025 DATA_POP(&ctx->u.rep->last_ptr);
1026 if (ret) {
1027 RETURN_ON_ERROR(ret);
1028 RETURN_SUCCESS;
1029 }
1030 ctx->u.rep->count = ctx->count-1;
1031 state->ptr = ctx->ptr;
1032 RETURN_FAILURE;
1033
1034 case SRE_OP_GROUPREF:
1035 /* match backreference */
1036 TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
1037 ctx->ptr, ctx->pattern[0]));
1038 i = ctx->pattern[0];
1039 {
1040 Py_ssize_t groupref = i+i;
1041 if (groupref >= state->lastmark) {
1042 RETURN_FAILURE;
1043 } else {
1044 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1045 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1046 if (!p || !e || e < p)
1047 RETURN_FAILURE;
1048 while (p < e) {
1049 if (ctx->ptr >= end || *ctx->ptr != *p)
1050 RETURN_FAILURE;
1051 p++;
1052 ctx->ptr++;
1053 }
1054 }
1055 }
1056 ctx->pattern++;
1057 break;
1058
1059 case SRE_OP_GROUPREF_IGNORE:
1060 /* match backreference */
1061 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
1062 ctx->ptr, ctx->pattern[0]));
1063 i = ctx->pattern[0];
1064 {
1065 Py_ssize_t groupref = i+i;
1066 if (groupref >= state->lastmark) {
1067 RETURN_FAILURE;
1068 } else {
1069 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1070 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1071 if (!p || !e || e < p)
1072 RETURN_FAILURE;
1073 while (p < e) {
1074 if (ctx->ptr >= end ||
1075 state->lower(*ctx->ptr) != state->lower(*p))
1076 RETURN_FAILURE;
1077 p++;
1078 ctx->ptr++;
1079 }
1080 }
1081 }
1082 ctx->pattern++;
1083 break;
1084
1085 case SRE_OP_GROUPREF_EXISTS:
1086 TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
1087 ctx->ptr, ctx->pattern[0]));
1088 /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1089 i = ctx->pattern[0];
1090 {
1091 Py_ssize_t groupref = i+i;
1092 if (groupref >= state->lastmark) {
1093 ctx->pattern += ctx->pattern[1];
1094 break;
1095 } else {
1096 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1097 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1098 if (!p || !e || e < p) {
1099 ctx->pattern += ctx->pattern[1];
1100 break;
1101 }
1102 }
1103 }
1104 ctx->pattern += 2;
1105 break;
1106
1107 case SRE_OP_ASSERT:
1108 /* assert subpattern */
1109 /* <ASSERT> <skip> <back> <pattern> */
1110 TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
1111 ctx->ptr, ctx->pattern[1]));
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001112 if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1])
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001113 RETURN_FAILURE;
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001114 state->ptr = ctx->ptr - ctx->pattern[1];
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02001115 DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2);
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001116 RETURN_ON_FAILURE(ret);
1117 ctx->pattern += ctx->pattern[0];
1118 break;
1119
1120 case SRE_OP_ASSERT_NOT:
1121 /* assert not subpattern */
1122 /* <ASSERT_NOT> <skip> <back> <pattern> */
1123 TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
1124 ctx->ptr, ctx->pattern[1]));
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001125 if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) {
1126 state->ptr = ctx->ptr - ctx->pattern[1];
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02001127 DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001128 if (ret) {
1129 RETURN_ON_ERROR(ret);
1130 RETURN_FAILURE;
1131 }
1132 }
1133 ctx->pattern += ctx->pattern[0];
1134 break;
1135
1136 case SRE_OP_FAILURE:
1137 /* immediate failure */
1138 TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
1139 RETURN_FAILURE;
1140
1141 default:
1142 TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
1143 ctx->pattern[-1]));
1144 RETURN_ERROR(SRE_ERROR_ILLEGAL);
1145 }
1146 }
1147
1148exit:
1149 ctx_pos = ctx->last_ctx_pos;
1150 jump = ctx->jump;
1151 DATA_POP_DISCARD(ctx);
1152 if (ctx_pos == -1)
1153 return ret;
1154 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1155
1156 switch (jump) {
1157 case JUMP_MAX_UNTIL_2:
1158 TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
1159 goto jump_max_until_2;
1160 case JUMP_MAX_UNTIL_3:
1161 TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
1162 goto jump_max_until_3;
1163 case JUMP_MIN_UNTIL_2:
1164 TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
1165 goto jump_min_until_2;
1166 case JUMP_MIN_UNTIL_3:
1167 TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
1168 goto jump_min_until_3;
1169 case JUMP_BRANCH:
1170 TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
1171 goto jump_branch;
1172 case JUMP_MAX_UNTIL_1:
1173 TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
1174 goto jump_max_until_1;
1175 case JUMP_MIN_UNTIL_1:
1176 TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
1177 goto jump_min_until_1;
1178 case JUMP_REPEAT:
1179 TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
1180 goto jump_repeat;
1181 case JUMP_REPEAT_ONE_1:
1182 TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
1183 goto jump_repeat_one_1;
1184 case JUMP_REPEAT_ONE_2:
1185 TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
1186 goto jump_repeat_one_2;
1187 case JUMP_MIN_REPEAT_ONE:
1188 TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
1189 goto jump_min_repeat_one;
1190 case JUMP_ASSERT:
1191 TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
1192 goto jump_assert;
1193 case JUMP_ASSERT_NOT:
1194 TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
1195 goto jump_assert_not;
1196 case JUMP_NONE:
1197 TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
1198 ctx->ptr, ret));
1199 break;
1200 }
1201
1202 return ret; /* should never get here */
1203}
1204
1205LOCAL(Py_ssize_t)
1206SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1207{
1208 SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1209 SRE_CHAR* end = (SRE_CHAR *)state->end;
1210 Py_ssize_t status = 0;
1211 Py_ssize_t prefix_len = 0;
1212 Py_ssize_t prefix_skip = 0;
1213 SRE_CODE* prefix = NULL;
1214 SRE_CODE* charset = NULL;
1215 SRE_CODE* overlap = NULL;
1216 int flags = 0;
1217
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001218 if (ptr > end)
1219 return 0;
1220
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001221 if (pattern[0] == SRE_OP_INFO) {
1222 /* optimization info block */
1223 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
1224
1225 flags = pattern[2];
1226
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001227 if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) {
1228 TRACE(("reject (got %u chars, need %u)\n",
1229 (unsigned int)(end - ptr), pattern[3]));
1230 return 0;
1231 }
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001232 if (pattern[3] > 1) {
1233 /* adjust end point (but make sure we leave at least one
1234 character in there, so literal search will work) */
1235 end -= pattern[3] - 1;
1236 if (end <= ptr)
1237 end = ptr;
1238 }
1239
1240 if (flags & SRE_INFO_PREFIX) {
1241 /* pattern starts with a known prefix */
1242 /* <length> <skip> <prefix data> <overlap data> */
1243 prefix_len = pattern[5];
1244 prefix_skip = pattern[6];
1245 prefix = pattern + 7;
1246 overlap = prefix + prefix_len - 1;
1247 } else if (flags & SRE_INFO_CHARSET)
1248 /* pattern starts with a character from a known set */
1249 /* <charset> */
1250 charset = pattern + 5;
1251
1252 pattern += 1 + pattern[1];
1253 }
1254
1255 TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n",
1256 prefix, prefix_len, prefix_skip));
1257 TRACE(("charset = %p\n", charset));
1258
1259#if defined(USE_FAST_SEARCH)
1260 if (prefix_len > 1) {
1261 /* pattern starts with a known prefix. use the overlap
1262 table to skip forward as fast as we possibly can */
1263 Py_ssize_t i = 0;
1264
1265 end = (SRE_CHAR *)state->end;
1266 if (prefix_len > end - ptr)
1267 return 0;
1268#if SIZEOF_SRE_CHAR < 4
1269 for (i = 0; i < prefix_len; i++)
1270 if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1271 return 0; /* literal can't match: doesn't fit in char width */
1272#endif
1273 while (ptr < end) {
1274 SRE_CHAR c = (SRE_CHAR) prefix[0];
1275 while (*ptr++ != c) {
1276 if (ptr >= end)
1277 return 0;
1278 }
1279 if (ptr >= end)
1280 return 0;
1281
1282 i = 1;
1283 do {
1284 if (*ptr == (SRE_CHAR) prefix[i]) {
1285 if (++i != prefix_len) {
1286 if (++ptr >= end)
1287 return 0;
1288 continue;
1289 }
1290 /* found a potential match */
1291 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1292 state->start = ptr - (prefix_len - 1);
1293 state->ptr = ptr - (prefix_len - prefix_skip - 1);
1294 if (flags & SRE_INFO_LITERAL)
1295 return 1; /* we got all of it */
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03001296 status = SRE(match)(state, pattern + 2*prefix_skip, 0);
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001297 if (status != 0)
1298 return status;
1299 /* close but no cigar -- try again */
1300 if (++ptr >= end)
1301 return 0;
1302 }
1303 i = overlap[i];
1304 } while (i != 0);
1305 }
1306 return 0;
1307 }
1308#endif
1309
1310 if (pattern[0] == SRE_OP_LITERAL) {
1311 /* pattern starts with a literal character. this is used
1312 for short prefixes, and if fast search is disabled */
1313 SRE_CHAR c = (SRE_CHAR) pattern[1];
1314#if SIZEOF_SRE_CHAR < 4
1315 if ((SRE_CODE) c != pattern[1])
1316 return 0; /* literal can't match: doesn't fit in char width */
1317#endif
1318 end = (SRE_CHAR *)state->end;
1319 while (ptr < end) {
1320 while (*ptr != c) {
1321 if (++ptr >= end)
1322 return 0;
1323 }
1324 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1325 state->start = ptr;
1326 state->ptr = ++ptr;
1327 if (flags & SRE_INFO_LITERAL)
1328 return 1; /* we got all of it */
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03001329 status = SRE(match)(state, pattern + 2, 0);
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001330 if (status != 0)
1331 break;
1332 }
1333 } else if (charset) {
1334 /* pattern starts with a character from a known set */
1335 end = (SRE_CHAR *)state->end;
1336 for (;;) {
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +02001337 while (ptr < end && !SRE(charset)(state, charset, *ptr))
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001338 ptr++;
1339 if (ptr >= end)
1340 return 0;
1341 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1342 state->start = ptr;
1343 state->ptr = ptr;
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03001344 status = SRE(match)(state, pattern, 0);
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001345 if (status != 0)
1346 break;
1347 ptr++;
1348 }
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001349 } else {
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001350 /* general case */
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001351 assert(ptr <= end);
1352 while (1) {
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001353 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001354 state->start = state->ptr = ptr;
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03001355 status = SRE(match)(state, pattern, 0);
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001356 if (status != 0 || ptr >= end)
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001357 break;
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001358 ptr++;
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001359 }
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001360 }
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001361
1362 return status;
1363}
1364
1365#undef SRE_CHAR
1366#undef SIZEOF_SRE_CHAR
1367#undef SRE
1368
1369/* vim:ts=4:sw=4:et
1370*/