blob: 5c6c5a559e6fa73a49ce04b9798f8164b3277e03 [file] [log] [blame]
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001/*
2 * Secret Labs' Regular Expression Engine
3 *
4 * regular expression matching engine
5 *
6 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
7 *
8 * See the _sre.c file for information on usage and redistribution.
9 */
10
11/* String matching engine */
12
13/* This file is included three times, with different character settings */
14
15LOCAL(int)
16SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
17{
18 /* check if pointer is at given position */
19
20 Py_ssize_t thisp, thatp;
21
22 switch (at) {
23
24 case SRE_AT_BEGINNING:
25 case SRE_AT_BEGINNING_STRING:
26 return ((void*) ptr == state->beginning);
27
28 case SRE_AT_BEGINNING_LINE:
29 return ((void*) ptr == state->beginning ||
30 SRE_IS_LINEBREAK((int) ptr[-1]));
31
32 case SRE_AT_END:
33 return (((void*) (ptr+1) == state->end &&
34 SRE_IS_LINEBREAK((int) ptr[0])) ||
35 ((void*) ptr == state->end));
36
37 case SRE_AT_END_LINE:
38 return ((void*) ptr == state->end ||
39 SRE_IS_LINEBREAK((int) ptr[0]));
40
41 case SRE_AT_END_STRING:
42 return ((void*) ptr == state->end);
43
44 case SRE_AT_BOUNDARY:
45 if (state->beginning == state->end)
46 return 0;
47 thatp = ((void*) ptr > state->beginning) ?
48 SRE_IS_WORD((int) ptr[-1]) : 0;
49 thisp = ((void*) ptr < state->end) ?
50 SRE_IS_WORD((int) ptr[0]) : 0;
51 return thisp != thatp;
52
53 case SRE_AT_NON_BOUNDARY:
54 if (state->beginning == state->end)
55 return 0;
56 thatp = ((void*) ptr > state->beginning) ?
57 SRE_IS_WORD((int) ptr[-1]) : 0;
58 thisp = ((void*) ptr < state->end) ?
59 SRE_IS_WORD((int) ptr[0]) : 0;
60 return thisp == thatp;
61
62 case SRE_AT_LOC_BOUNDARY:
63 if (state->beginning == state->end)
64 return 0;
65 thatp = ((void*) ptr > state->beginning) ?
66 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
67 thisp = ((void*) ptr < state->end) ?
68 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
69 return thisp != thatp;
70
71 case SRE_AT_LOC_NON_BOUNDARY:
72 if (state->beginning == state->end)
73 return 0;
74 thatp = ((void*) ptr > state->beginning) ?
75 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
76 thisp = ((void*) ptr < state->end) ?
77 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
78 return thisp == thatp;
79
80 case SRE_AT_UNI_BOUNDARY:
81 if (state->beginning == state->end)
82 return 0;
83 thatp = ((void*) ptr > state->beginning) ?
84 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
85 thisp = ((void*) ptr < state->end) ?
86 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
87 return thisp != thatp;
88
89 case SRE_AT_UNI_NON_BOUNDARY:
90 if (state->beginning == state->end)
91 return 0;
92 thatp = ((void*) ptr > state->beginning) ?
93 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
94 thisp = ((void*) ptr < state->end) ?
95 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
96 return thisp == thatp;
97
98 }
99
100 return 0;
101}
102
103LOCAL(int)
104SRE(charset)(SRE_CODE* set, SRE_CODE ch)
105{
106 /* check if character is a member of the given set */
107
108 int ok = 1;
109
110 for (;;) {
111 switch (*set++) {
112
113 case SRE_OP_FAILURE:
114 return !ok;
115
116 case SRE_OP_LITERAL:
117 /* <LITERAL> <code> */
118 if (ch == set[0])
119 return ok;
120 set++;
121 break;
122
123 case SRE_OP_CATEGORY:
124 /* <CATEGORY> <code> */
125 if (sre_category(set[0], (int) ch))
126 return ok;
127 set++;
128 break;
129
130 case SRE_OP_CHARSET:
131 /* <CHARSET> <bitmap> */
132 if (ch < 256 &&
133 (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
134 return ok;
135 set += 256/SRE_CODE_BITS;
136 break;
137
138 case SRE_OP_RANGE:
139 /* <RANGE> <lower> <upper> */
140 if (set[0] <= ch && ch <= set[1])
141 return ok;
142 set += 2;
143 break;
144
145 case SRE_OP_NEGATE:
146 ok = !ok;
147 break;
148
149 case SRE_OP_BIGCHARSET:
150 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
151 {
152 Py_ssize_t count, block;
153 count = *(set++);
154
155 if (ch < 0x10000u)
156 block = ((unsigned char*)set)[ch >> 8];
157 else
158 block = -1;
159 set += 256/sizeof(SRE_CODE);
160 if (block >=0 &&
161 (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
162 (1u << (ch & (SRE_CODE_BITS-1)))))
163 return ok;
164 set += count * (256/SRE_CODE_BITS);
165 break;
166 }
167
168 default:
169 /* internal error -- there's not much we can do about it
170 here, so let's just pretend it didn't match... */
171 return 0;
172 }
173 }
174}
175
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300176LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all);
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300177
178LOCAL(Py_ssize_t)
179SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
180{
181 SRE_CODE chr;
182 SRE_CHAR c;
183 SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
184 SRE_CHAR* end = (SRE_CHAR *)state->end;
185 Py_ssize_t i;
186
187 /* adjust end */
188 if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
189 end = ptr + maxcount;
190
191 switch (pattern[0]) {
192
193 case SRE_OP_IN:
194 /* repeated set */
195 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
196 while (ptr < end && SRE(charset)(pattern + 2, *ptr))
197 ptr++;
198 break;
199
200 case SRE_OP_ANY:
201 /* repeated dot wildcard. */
202 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
203 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
204 ptr++;
205 break;
206
207 case SRE_OP_ANY_ALL:
208 /* repeated dot wildcard. skip to the end of the target
209 string, and backtrack from there */
210 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
211 ptr = end;
212 break;
213
214 case SRE_OP_LITERAL:
215 /* repeated literal */
216 chr = pattern[1];
217 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
218 c = (SRE_CHAR) chr;
219#if SIZEOF_SRE_CHAR < 4
220 if ((SRE_CODE) c != chr)
221 ; /* literal can't match: doesn't fit in char width */
222 else
223#endif
224 while (ptr < end && *ptr == c)
225 ptr++;
226 break;
227
228 case SRE_OP_LITERAL_IGNORE:
229 /* repeated literal */
230 chr = pattern[1];
231 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
232 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
233 ptr++;
234 break;
235
236 case SRE_OP_NOT_LITERAL:
237 /* repeated non-literal */
238 chr = pattern[1];
239 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
240 c = (SRE_CHAR) chr;
241#if SIZEOF_SRE_CHAR < 4
242 if ((SRE_CODE) c != chr)
243 ptr = end; /* literal can't match: doesn't fit in char width */
244 else
245#endif
246 while (ptr < end && *ptr != c)
247 ptr++;
248 break;
249
250 case SRE_OP_NOT_LITERAL_IGNORE:
251 /* repeated non-literal */
252 chr = pattern[1];
253 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
254 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
255 ptr++;
256 break;
257
258 default:
259 /* repeated single character pattern */
260 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
261 while ((SRE_CHAR*) state->ptr < end) {
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300262 i = SRE(match)(state, pattern, 0);
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300263 if (i < 0)
264 return i;
265 if (!i)
266 break;
267 }
268 TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
269 (SRE_CHAR*) state->ptr - ptr));
270 return (SRE_CHAR*) state->ptr - ptr;
271 }
272
273 TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
274 ptr - (SRE_CHAR*) state->ptr));
275 return ptr - (SRE_CHAR*) state->ptr;
276}
277
278#if 0 /* not used in this release */
279LOCAL(int)
280SRE(info)(SRE_STATE* state, SRE_CODE* pattern)
281{
282 /* check if an SRE_OP_INFO block matches at the current position.
283 returns the number of SRE_CODE objects to skip if successful, 0
284 if no match */
285
286 SRE_CHAR* end = (SRE_CHAR*) state->end;
287 SRE_CHAR* ptr = (SRE_CHAR*) state->ptr;
288 Py_ssize_t i;
289
290 /* check minimal length */
291 if (pattern[3] && end - ptr < pattern[3])
292 return 0;
293
294 /* check known prefix */
295 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
296 /* <length> <skip> <prefix data> <overlap data> */
297 for (i = 0; i < pattern[5]; i++)
298 if ((SRE_CODE) ptr[i] != pattern[7 + i])
299 return 0;
300 return pattern[0] + 2 * pattern[6];
301 }
302 return pattern[0];
303}
304#endif
305
306/* The macros below should be used to protect recursive SRE(match)()
307 * calls that *failed* and do *not* return immediately (IOW, those
308 * that will backtrack). Explaining:
309 *
310 * - Recursive SRE(match)() returned true: that's usually a success
311 * (besides atypical cases like ASSERT_NOT), therefore there's no
312 * reason to restore lastmark;
313 *
314 * - Recursive SRE(match)() returned false but the current SRE(match)()
315 * is returning to the caller: If the current SRE(match)() is the
316 * top function of the recursion, returning false will be a matching
317 * failure, and it doesn't matter where lastmark is pointing to.
318 * If it's *not* the top function, it will be a recursive SRE(match)()
319 * failure by itself, and the calling SRE(match)() will have to deal
320 * with the failure by the same rules explained here (it will restore
321 * lastmark by itself if necessary);
322 *
323 * - Recursive SRE(match)() returned false, and will continue the
324 * outside 'for' loop: must be protected when breaking, since the next
325 * OP could potentially depend on lastmark;
326 *
327 * - Recursive SRE(match)() returned false, and will be called again
328 * inside a local for/while loop: must be protected between each
329 * loop iteration, since the recursive SRE(match)() could do anything,
330 * and could potentially depend on lastmark.
331 *
332 * For more information, check the discussion at SF patch #712900.
333 */
334#define LASTMARK_SAVE() \
335 do { \
336 ctx->lastmark = state->lastmark; \
337 ctx->lastindex = state->lastindex; \
338 } while (0)
339#define LASTMARK_RESTORE() \
340 do { \
341 state->lastmark = ctx->lastmark; \
342 state->lastindex = ctx->lastindex; \
343 } while (0)
344
345#define RETURN_ERROR(i) do { return i; } while(0)
346#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
347#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
348
349#define RETURN_ON_ERROR(i) \
350 do { if (i < 0) RETURN_ERROR(i); } while (0)
351#define RETURN_ON_SUCCESS(i) \
352 do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
353#define RETURN_ON_FAILURE(i) \
354 do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
355
356#define SFY(x) #x
357
358#define DATA_STACK_ALLOC(state, type, ptr) \
359do { \
360 alloc_pos = state->data_stack_base; \
361 TRACE(("allocating %s in %" PY_FORMAT_SIZE_T "d " \
362 "(%" PY_FORMAT_SIZE_T "d)\n", \
363 SFY(type), alloc_pos, sizeof(type))); \
364 if (sizeof(type) > state->data_stack_size - alloc_pos) { \
365 int j = data_stack_grow(state, sizeof(type)); \
366 if (j < 0) return j; \
367 if (ctx_pos != -1) \
368 DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
369 } \
370 ptr = (type*)(state->data_stack+alloc_pos); \
371 state->data_stack_base += sizeof(type); \
372} while (0)
373
374#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
375do { \
376 TRACE(("looking up %s at %" PY_FORMAT_SIZE_T "d\n", SFY(type), pos)); \
377 ptr = (type*)(state->data_stack+pos); \
378} while (0)
379
380#define DATA_STACK_PUSH(state, data, size) \
381do { \
382 TRACE(("copy data in %p to %" PY_FORMAT_SIZE_T "d " \
383 "(%" PY_FORMAT_SIZE_T "d)\n", \
384 data, state->data_stack_base, size)); \
385 if (size > state->data_stack_size - state->data_stack_base) { \
386 int j = data_stack_grow(state, size); \
387 if (j < 0) return j; \
388 if (ctx_pos != -1) \
389 DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
390 } \
391 memcpy(state->data_stack+state->data_stack_base, data, size); \
392 state->data_stack_base += size; \
393} while (0)
394
395#define DATA_STACK_POP(state, data, size, discard) \
396do { \
397 TRACE(("copy data to %p from %" PY_FORMAT_SIZE_T "d " \
398 "(%" PY_FORMAT_SIZE_T "d)\n", \
399 data, state->data_stack_base-size, size)); \
400 memcpy(data, state->data_stack+state->data_stack_base-size, size); \
401 if (discard) \
402 state->data_stack_base -= size; \
403} while (0)
404
405#define DATA_STACK_POP_DISCARD(state, size) \
406do { \
407 TRACE(("discard data from %" PY_FORMAT_SIZE_T "d " \
408 "(%" PY_FORMAT_SIZE_T "d)\n", \
409 state->data_stack_base-size, size)); \
410 state->data_stack_base -= size; \
411} while(0)
412
413#define DATA_PUSH(x) \
414 DATA_STACK_PUSH(state, (x), sizeof(*(x)))
415#define DATA_POP(x) \
416 DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
417#define DATA_POP_DISCARD(x) \
418 DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
419#define DATA_ALLOC(t,p) \
420 DATA_STACK_ALLOC(state, t, p)
421#define DATA_LOOKUP_AT(t,p,pos) \
422 DATA_STACK_LOOKUP_AT(state,t,p,pos)
423
424#define MARK_PUSH(lastmark) \
425 do if (lastmark > 0) { \
426 i = lastmark; /* ctx->lastmark may change if reallocated */ \
427 DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
428 } while (0)
429#define MARK_POP(lastmark) \
430 do if (lastmark > 0) { \
431 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
432 } while (0)
433#define MARK_POP_KEEP(lastmark) \
434 do if (lastmark > 0) { \
435 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
436 } while (0)
437#define MARK_POP_DISCARD(lastmark) \
438 do if (lastmark > 0) { \
439 DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
440 } while (0)
441
442#define JUMP_NONE 0
443#define JUMP_MAX_UNTIL_1 1
444#define JUMP_MAX_UNTIL_2 2
445#define JUMP_MAX_UNTIL_3 3
446#define JUMP_MIN_UNTIL_1 4
447#define JUMP_MIN_UNTIL_2 5
448#define JUMP_MIN_UNTIL_3 6
449#define JUMP_REPEAT 7
450#define JUMP_REPEAT_ONE_1 8
451#define JUMP_REPEAT_ONE_2 9
452#define JUMP_MIN_REPEAT_ONE 10
453#define JUMP_BRANCH 11
454#define JUMP_ASSERT 12
455#define JUMP_ASSERT_NOT 13
456
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200457#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, matchall) \
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300458 DATA_ALLOC(SRE(match_context), nextctx); \
459 nextctx->last_ctx_pos = ctx_pos; \
460 nextctx->jump = jumpvalue; \
461 nextctx->pattern = nextpattern; \
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200462 nextctx->match_all = matchall; \
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300463 ctx_pos = alloc_pos; \
464 ctx = nextctx; \
465 goto entrance; \
466 jumplabel: \
467 while (0) /* gcc doesn't like labels at end of scopes */ \
468
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200469#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
470 DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->match_all)
471
472#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
473 DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
474
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300475typedef struct {
476 Py_ssize_t last_ctx_pos;
477 Py_ssize_t jump;
478 SRE_CHAR* ptr;
479 SRE_CODE* pattern;
480 Py_ssize_t count;
481 Py_ssize_t lastmark;
482 Py_ssize_t lastindex;
483 union {
484 SRE_CODE chr;
485 SRE_REPEAT* rep;
486 } u;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200487 int match_all;
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300488} SRE(match_context);
489
490/* check if string matches the given pattern. returns <0 for
491 error, 0 for failure, and 1 for success */
492LOCAL(Py_ssize_t)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300493SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all)
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300494{
495 SRE_CHAR* end = (SRE_CHAR *)state->end;
496 Py_ssize_t alloc_pos, ctx_pos = -1;
497 Py_ssize_t i, ret = 0;
498 Py_ssize_t jump;
499 unsigned int sigcount=0;
500
501 SRE(match_context)* ctx;
502 SRE(match_context)* nextctx;
503
504 TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
505
506 DATA_ALLOC(SRE(match_context), ctx);
507 ctx->last_ctx_pos = -1;
508 ctx->jump = JUMP_NONE;
509 ctx->pattern = pattern;
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300510 ctx->match_all = match_all;
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300511 ctx_pos = alloc_pos;
512
513entrance:
514
515 ctx->ptr = (SRE_CHAR *)state->ptr;
516
517 if (ctx->pattern[0] == SRE_OP_INFO) {
518 /* optimization info block */
519 /* <INFO> <1=skip> <2=flags> <3=min> ... */
520 if (ctx->pattern[3] && (Py_uintptr_t)(end - ctx->ptr) < ctx->pattern[3]) {
521 TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, "
522 "need %" PY_FORMAT_SIZE_T "d)\n",
523 end - ctx->ptr, (Py_ssize_t) ctx->pattern[3]));
524 RETURN_FAILURE;
525 }
526 ctx->pattern += ctx->pattern[1] + 1;
527 }
528
529 for (;;) {
530 ++sigcount;
531 if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
532 RETURN_ERROR(SRE_ERROR_INTERRUPTED);
533
534 switch (*ctx->pattern++) {
535
536 case SRE_OP_MARK:
537 /* set mark */
538 /* <MARK> <gid> */
539 TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
540 ctx->ptr, ctx->pattern[0]));
541 i = ctx->pattern[0];
542 if (i & 1)
543 state->lastindex = i/2 + 1;
544 if (i > state->lastmark) {
545 /* state->lastmark is the highest valid index in the
546 state->mark array. If it is increased by more than 1,
547 the intervening marks must be set to NULL to signal
548 that these marks have not been encountered. */
549 Py_ssize_t j = state->lastmark + 1;
550 while (j < i)
551 state->mark[j++] = NULL;
552 state->lastmark = i;
553 }
554 state->mark[i] = ctx->ptr;
555 ctx->pattern++;
556 break;
557
558 case SRE_OP_LITERAL:
559 /* match literal string */
560 /* <LITERAL> <code> */
561 TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
562 ctx->ptr, *ctx->pattern));
563 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
564 RETURN_FAILURE;
565 ctx->pattern++;
566 ctx->ptr++;
567 break;
568
569 case SRE_OP_NOT_LITERAL:
570 /* match anything that is not literal character */
571 /* <NOT_LITERAL> <code> */
572 TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
573 ctx->ptr, *ctx->pattern));
574 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
575 RETURN_FAILURE;
576 ctx->pattern++;
577 ctx->ptr++;
578 break;
579
580 case SRE_OP_SUCCESS:
581 /* end of pattern */
582 TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200583 if (!ctx->match_all || ctx->ptr == state->end) {
584 state->ptr = ctx->ptr;
585 RETURN_SUCCESS;
586 }
587 RETURN_FAILURE;
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300588
589 case SRE_OP_AT:
590 /* match at given position */
591 /* <AT> <code> */
592 TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
593 if (!SRE(at)(state, ctx->ptr, *ctx->pattern))
594 RETURN_FAILURE;
595 ctx->pattern++;
596 break;
597
598 case SRE_OP_CATEGORY:
599 /* match at given category */
600 /* <CATEGORY> <code> */
601 TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
602 ctx->ptr, *ctx->pattern));
603 if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
604 RETURN_FAILURE;
605 ctx->pattern++;
606 ctx->ptr++;
607 break;
608
609 case SRE_OP_ANY:
610 /* match anything (except a newline) */
611 /* <ANY> */
612 TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
613 if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
614 RETURN_FAILURE;
615 ctx->ptr++;
616 break;
617
618 case SRE_OP_ANY_ALL:
619 /* match anything */
620 /* <ANY_ALL> */
621 TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
622 if (ctx->ptr >= end)
623 RETURN_FAILURE;
624 ctx->ptr++;
625 break;
626
627 case SRE_OP_IN:
628 /* match set member (or non_member) */
629 /* <IN> <skip> <set> */
630 TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
631 if (ctx->ptr >= end || !SRE(charset)(ctx->pattern + 1, *ctx->ptr))
632 RETURN_FAILURE;
633 ctx->pattern += ctx->pattern[0];
634 ctx->ptr++;
635 break;
636
637 case SRE_OP_LITERAL_IGNORE:
638 TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
639 ctx->pattern, ctx->ptr, ctx->pattern[0]));
640 if (ctx->ptr >= end ||
641 state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
642 RETURN_FAILURE;
643 ctx->pattern++;
644 ctx->ptr++;
645 break;
646
647 case SRE_OP_NOT_LITERAL_IGNORE:
648 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
649 ctx->pattern, ctx->ptr, *ctx->pattern));
650 if (ctx->ptr >= end ||
651 state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
652 RETURN_FAILURE;
653 ctx->pattern++;
654 ctx->ptr++;
655 break;
656
657 case SRE_OP_IN_IGNORE:
658 TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
659 if (ctx->ptr >= end
660 || !SRE(charset)(ctx->pattern+1,
661 (SRE_CODE)state->lower(*ctx->ptr)))
662 RETURN_FAILURE;
663 ctx->pattern += ctx->pattern[0];
664 ctx->ptr++;
665 break;
666
667 case SRE_OP_JUMP:
668 case SRE_OP_INFO:
669 /* jump forward */
670 /* <JUMP> <offset> */
671 TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
672 ctx->ptr, ctx->pattern[0]));
673 ctx->pattern += ctx->pattern[0];
674 break;
675
676 case SRE_OP_BRANCH:
677 /* alternation */
678 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
679 TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
680 LASTMARK_SAVE();
681 ctx->u.rep = state->repeat;
682 if (ctx->u.rep)
683 MARK_PUSH(ctx->lastmark);
684 for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
685 if (ctx->pattern[1] == SRE_OP_LITERAL &&
686 (ctx->ptr >= end ||
687 (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
688 continue;
689 if (ctx->pattern[1] == SRE_OP_IN &&
690 (ctx->ptr >= end ||
691 !SRE(charset)(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
692 continue;
693 state->ptr = ctx->ptr;
694 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
695 if (ret) {
696 if (ctx->u.rep)
697 MARK_POP_DISCARD(ctx->lastmark);
698 RETURN_ON_ERROR(ret);
699 RETURN_SUCCESS;
700 }
701 if (ctx->u.rep)
702 MARK_POP_KEEP(ctx->lastmark);
703 LASTMARK_RESTORE();
704 }
705 if (ctx->u.rep)
706 MARK_POP_DISCARD(ctx->lastmark);
707 RETURN_FAILURE;
708
709 case SRE_OP_REPEAT_ONE:
710 /* match repeated sequence (maximizing regexp) */
711
712 /* this operator only works if the repeated item is
713 exactly one character wide, and we're not already
714 collecting backtracking points. for other cases,
715 use the MAX_REPEAT operator */
716
717 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
718
719 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
720 ctx->pattern[1], ctx->pattern[2]));
721
722 if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
723 RETURN_FAILURE; /* cannot match */
724
725 state->ptr = ctx->ptr;
726
727 ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[2]);
728 RETURN_ON_ERROR(ret);
729 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
730 ctx->count = ret;
731 ctx->ptr += ctx->count;
732
733 /* when we arrive here, count contains the number of
734 matches, and ctx->ptr points to the tail of the target
735 string. check if the rest of the pattern matches,
736 and backtrack if not. */
737
738 if (ctx->count < (Py_ssize_t) ctx->pattern[1])
739 RETURN_FAILURE;
740
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200741 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300742 ctx->ptr == state->end) {
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300743 /* tail is empty. we're finished */
744 state->ptr = ctx->ptr;
745 RETURN_SUCCESS;
746 }
747
748 LASTMARK_SAVE();
749
750 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
751 /* tail starts with a literal. skip positions where
752 the rest of the pattern cannot possibly match */
753 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
754 for (;;) {
755 while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
756 (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
757 ctx->ptr--;
758 ctx->count--;
759 }
760 if (ctx->count < (Py_ssize_t) ctx->pattern[1])
761 break;
762 state->ptr = ctx->ptr;
763 DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
764 ctx->pattern+ctx->pattern[0]);
765 if (ret) {
766 RETURN_ON_ERROR(ret);
767 RETURN_SUCCESS;
768 }
769
770 LASTMARK_RESTORE();
771
772 ctx->ptr--;
773 ctx->count--;
774 }
775
776 } else {
777 /* general case */
778 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
779 state->ptr = ctx->ptr;
780 DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
781 ctx->pattern+ctx->pattern[0]);
782 if (ret) {
783 RETURN_ON_ERROR(ret);
784 RETURN_SUCCESS;
785 }
786 ctx->ptr--;
787 ctx->count--;
788 LASTMARK_RESTORE();
789 }
790 }
791 RETURN_FAILURE;
792
793 case SRE_OP_MIN_REPEAT_ONE:
794 /* match repeated sequence (minimizing regexp) */
795
796 /* this operator only works if the repeated item is
797 exactly one character wide, and we're not already
798 collecting backtracking points. for other cases,
799 use the MIN_REPEAT operator */
800
801 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
802
803 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
804 ctx->pattern[1], ctx->pattern[2]));
805
806 if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
807 RETURN_FAILURE; /* cannot match */
808
809 state->ptr = ctx->ptr;
810
811 if (ctx->pattern[1] == 0)
812 ctx->count = 0;
813 else {
814 /* count using pattern min as the maximum */
815 ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[1]);
816 RETURN_ON_ERROR(ret);
817 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
818 if (ret < (Py_ssize_t) ctx->pattern[1])
819 /* didn't match minimum number of times */
820 RETURN_FAILURE;
821 /* advance past minimum matches of repeat */
822 ctx->count = ret;
823 ctx->ptr += ctx->count;
824 }
825
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200826 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300827 (!match_all || ctx->ptr == state->end)) {
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300828 /* tail is empty. we're finished */
829 state->ptr = ctx->ptr;
830 RETURN_SUCCESS;
831
832 } else {
833 /* general case */
834 LASTMARK_SAVE();
835 while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
836 || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
837 state->ptr = ctx->ptr;
838 DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
839 ctx->pattern+ctx->pattern[0]);
840 if (ret) {
841 RETURN_ON_ERROR(ret);
842 RETURN_SUCCESS;
843 }
844 state->ptr = ctx->ptr;
845 ret = SRE(count)(state, ctx->pattern+3, 1);
846 RETURN_ON_ERROR(ret);
847 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
848 if (ret == 0)
849 break;
850 assert(ret == 1);
851 ctx->ptr++;
852 ctx->count++;
853 LASTMARK_RESTORE();
854 }
855 }
856 RETURN_FAILURE;
857
858 case SRE_OP_REPEAT:
859 /* create repeat context. all the hard work is done
860 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
861 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
862 TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
863 ctx->pattern[1], ctx->pattern[2]));
864
865 /* install new repeat context */
866 ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
867 if (!ctx->u.rep) {
868 PyErr_NoMemory();
869 RETURN_FAILURE;
870 }
871 ctx->u.rep->count = -1;
872 ctx->u.rep->pattern = ctx->pattern;
873 ctx->u.rep->prev = state->repeat;
874 ctx->u.rep->last_ptr = NULL;
875 state->repeat = ctx->u.rep;
876
877 state->ptr = ctx->ptr;
878 DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
879 state->repeat = ctx->u.rep->prev;
880 PyObject_FREE(ctx->u.rep);
881
882 if (ret) {
883 RETURN_ON_ERROR(ret);
884 RETURN_SUCCESS;
885 }
886 RETURN_FAILURE;
887
888 case SRE_OP_MAX_UNTIL:
889 /* maximizing repeat */
890 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
891
892 /* FIXME: we probably need to deal with zero-width
893 matches in here... */
894
895 ctx->u.rep = state->repeat;
896 if (!ctx->u.rep)
897 RETURN_ERROR(SRE_ERROR_STATE);
898
899 state->ptr = ctx->ptr;
900
901 ctx->count = ctx->u.rep->count+1;
902
903 TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
904 ctx->ptr, ctx->count));
905
906 if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
907 /* not enough matches */
908 ctx->u.rep->count = ctx->count;
909 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
910 ctx->u.rep->pattern+3);
911 if (ret) {
912 RETURN_ON_ERROR(ret);
913 RETURN_SUCCESS;
914 }
915 ctx->u.rep->count = ctx->count-1;
916 state->ptr = ctx->ptr;
917 RETURN_FAILURE;
918 }
919
920 if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
921 ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
922 state->ptr != ctx->u.rep->last_ptr) {
923 /* we may have enough matches, but if we can
924 match another item, do so */
925 ctx->u.rep->count = ctx->count;
926 LASTMARK_SAVE();
927 MARK_PUSH(ctx->lastmark);
928 /* zero-width match protection */
929 DATA_PUSH(&ctx->u.rep->last_ptr);
930 ctx->u.rep->last_ptr = state->ptr;
931 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
932 ctx->u.rep->pattern+3);
933 DATA_POP(&ctx->u.rep->last_ptr);
934 if (ret) {
935 MARK_POP_DISCARD(ctx->lastmark);
936 RETURN_ON_ERROR(ret);
937 RETURN_SUCCESS;
938 }
939 MARK_POP(ctx->lastmark);
940 LASTMARK_RESTORE();
941 ctx->u.rep->count = ctx->count-1;
942 state->ptr = ctx->ptr;
943 }
944
945 /* cannot match more repeated items here. make sure the
946 tail matches */
947 state->repeat = ctx->u.rep->prev;
948 DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
949 RETURN_ON_SUCCESS(ret);
950 state->repeat = ctx->u.rep;
951 state->ptr = ctx->ptr;
952 RETURN_FAILURE;
953
954 case SRE_OP_MIN_UNTIL:
955 /* minimizing repeat */
956 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
957
958 ctx->u.rep = state->repeat;
959 if (!ctx->u.rep)
960 RETURN_ERROR(SRE_ERROR_STATE);
961
962 state->ptr = ctx->ptr;
963
964 ctx->count = ctx->u.rep->count+1;
965
966 TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern,
967 ctx->ptr, ctx->count, ctx->u.rep->pattern));
968
969 if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
970 /* not enough matches */
971 ctx->u.rep->count = ctx->count;
972 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
973 ctx->u.rep->pattern+3);
974 if (ret) {
975 RETURN_ON_ERROR(ret);
976 RETURN_SUCCESS;
977 }
978 ctx->u.rep->count = ctx->count-1;
979 state->ptr = ctx->ptr;
980 RETURN_FAILURE;
981 }
982
983 LASTMARK_SAVE();
984
985 /* see if the tail matches */
986 state->repeat = ctx->u.rep->prev;
987 DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
988 if (ret) {
989 RETURN_ON_ERROR(ret);
990 RETURN_SUCCESS;
991 }
992
993 state->repeat = ctx->u.rep;
994 state->ptr = ctx->ptr;
995
996 LASTMARK_RESTORE();
997
998 if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
999 && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1000 state->ptr == ctx->u.rep->last_ptr)
1001 RETURN_FAILURE;
1002
1003 ctx->u.rep->count = ctx->count;
1004 /* zero-width match protection */
1005 DATA_PUSH(&ctx->u.rep->last_ptr);
1006 ctx->u.rep->last_ptr = state->ptr;
1007 DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1008 ctx->u.rep->pattern+3);
1009 DATA_POP(&ctx->u.rep->last_ptr);
1010 if (ret) {
1011 RETURN_ON_ERROR(ret);
1012 RETURN_SUCCESS;
1013 }
1014 ctx->u.rep->count = ctx->count-1;
1015 state->ptr = ctx->ptr;
1016 RETURN_FAILURE;
1017
1018 case SRE_OP_GROUPREF:
1019 /* match backreference */
1020 TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
1021 ctx->ptr, ctx->pattern[0]));
1022 i = ctx->pattern[0];
1023 {
1024 Py_ssize_t groupref = i+i;
1025 if (groupref >= state->lastmark) {
1026 RETURN_FAILURE;
1027 } else {
1028 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1029 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1030 if (!p || !e || e < p)
1031 RETURN_FAILURE;
1032 while (p < e) {
1033 if (ctx->ptr >= end || *ctx->ptr != *p)
1034 RETURN_FAILURE;
1035 p++;
1036 ctx->ptr++;
1037 }
1038 }
1039 }
1040 ctx->pattern++;
1041 break;
1042
1043 case SRE_OP_GROUPREF_IGNORE:
1044 /* match backreference */
1045 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
1046 ctx->ptr, ctx->pattern[0]));
1047 i = ctx->pattern[0];
1048 {
1049 Py_ssize_t groupref = i+i;
1050 if (groupref >= state->lastmark) {
1051 RETURN_FAILURE;
1052 } else {
1053 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1054 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1055 if (!p || !e || e < p)
1056 RETURN_FAILURE;
1057 while (p < e) {
1058 if (ctx->ptr >= end ||
1059 state->lower(*ctx->ptr) != state->lower(*p))
1060 RETURN_FAILURE;
1061 p++;
1062 ctx->ptr++;
1063 }
1064 }
1065 }
1066 ctx->pattern++;
1067 break;
1068
1069 case SRE_OP_GROUPREF_EXISTS:
1070 TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
1071 ctx->ptr, ctx->pattern[0]));
1072 /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1073 i = ctx->pattern[0];
1074 {
1075 Py_ssize_t groupref = i+i;
1076 if (groupref >= state->lastmark) {
1077 ctx->pattern += ctx->pattern[1];
1078 break;
1079 } else {
1080 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1081 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1082 if (!p || !e || e < p) {
1083 ctx->pattern += ctx->pattern[1];
1084 break;
1085 }
1086 }
1087 }
1088 ctx->pattern += 2;
1089 break;
1090
1091 case SRE_OP_ASSERT:
1092 /* assert subpattern */
1093 /* <ASSERT> <skip> <back> <pattern> */
1094 TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
1095 ctx->ptr, ctx->pattern[1]));
1096 state->ptr = ctx->ptr - ctx->pattern[1];
1097 if (state->ptr < state->beginning)
1098 RETURN_FAILURE;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02001099 DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2);
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001100 RETURN_ON_FAILURE(ret);
1101 ctx->pattern += ctx->pattern[0];
1102 break;
1103
1104 case SRE_OP_ASSERT_NOT:
1105 /* assert not subpattern */
1106 /* <ASSERT_NOT> <skip> <back> <pattern> */
1107 TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
1108 ctx->ptr, ctx->pattern[1]));
1109 state->ptr = ctx->ptr - ctx->pattern[1];
1110 if (state->ptr >= state->beginning) {
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02001111 DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001112 if (ret) {
1113 RETURN_ON_ERROR(ret);
1114 RETURN_FAILURE;
1115 }
1116 }
1117 ctx->pattern += ctx->pattern[0];
1118 break;
1119
1120 case SRE_OP_FAILURE:
1121 /* immediate failure */
1122 TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
1123 RETURN_FAILURE;
1124
1125 default:
1126 TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
1127 ctx->pattern[-1]));
1128 RETURN_ERROR(SRE_ERROR_ILLEGAL);
1129 }
1130 }
1131
1132exit:
1133 ctx_pos = ctx->last_ctx_pos;
1134 jump = ctx->jump;
1135 DATA_POP_DISCARD(ctx);
1136 if (ctx_pos == -1)
1137 return ret;
1138 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1139
1140 switch (jump) {
1141 case JUMP_MAX_UNTIL_2:
1142 TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
1143 goto jump_max_until_2;
1144 case JUMP_MAX_UNTIL_3:
1145 TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
1146 goto jump_max_until_3;
1147 case JUMP_MIN_UNTIL_2:
1148 TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
1149 goto jump_min_until_2;
1150 case JUMP_MIN_UNTIL_3:
1151 TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
1152 goto jump_min_until_3;
1153 case JUMP_BRANCH:
1154 TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
1155 goto jump_branch;
1156 case JUMP_MAX_UNTIL_1:
1157 TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
1158 goto jump_max_until_1;
1159 case JUMP_MIN_UNTIL_1:
1160 TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
1161 goto jump_min_until_1;
1162 case JUMP_REPEAT:
1163 TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
1164 goto jump_repeat;
1165 case JUMP_REPEAT_ONE_1:
1166 TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
1167 goto jump_repeat_one_1;
1168 case JUMP_REPEAT_ONE_2:
1169 TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
1170 goto jump_repeat_one_2;
1171 case JUMP_MIN_REPEAT_ONE:
1172 TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
1173 goto jump_min_repeat_one;
1174 case JUMP_ASSERT:
1175 TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
1176 goto jump_assert;
1177 case JUMP_ASSERT_NOT:
1178 TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
1179 goto jump_assert_not;
1180 case JUMP_NONE:
1181 TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
1182 ctx->ptr, ret));
1183 break;
1184 }
1185
1186 return ret; /* should never get here */
1187}
1188
1189LOCAL(Py_ssize_t)
1190SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1191{
1192 SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1193 SRE_CHAR* end = (SRE_CHAR *)state->end;
1194 Py_ssize_t status = 0;
1195 Py_ssize_t prefix_len = 0;
1196 Py_ssize_t prefix_skip = 0;
1197 SRE_CODE* prefix = NULL;
1198 SRE_CODE* charset = NULL;
1199 SRE_CODE* overlap = NULL;
1200 int flags = 0;
1201
1202 if (pattern[0] == SRE_OP_INFO) {
1203 /* optimization info block */
1204 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
1205
1206 flags = pattern[2];
1207
1208 if (pattern[3] > 1) {
1209 /* adjust end point (but make sure we leave at least one
1210 character in there, so literal search will work) */
1211 end -= pattern[3] - 1;
1212 if (end <= ptr)
1213 end = ptr;
1214 }
1215
1216 if (flags & SRE_INFO_PREFIX) {
1217 /* pattern starts with a known prefix */
1218 /* <length> <skip> <prefix data> <overlap data> */
1219 prefix_len = pattern[5];
1220 prefix_skip = pattern[6];
1221 prefix = pattern + 7;
1222 overlap = prefix + prefix_len - 1;
1223 } else if (flags & SRE_INFO_CHARSET)
1224 /* pattern starts with a character from a known set */
1225 /* <charset> */
1226 charset = pattern + 5;
1227
1228 pattern += 1 + pattern[1];
1229 }
1230
1231 TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n",
1232 prefix, prefix_len, prefix_skip));
1233 TRACE(("charset = %p\n", charset));
1234
1235#if defined(USE_FAST_SEARCH)
1236 if (prefix_len > 1) {
1237 /* pattern starts with a known prefix. use the overlap
1238 table to skip forward as fast as we possibly can */
1239 Py_ssize_t i = 0;
1240
1241 end = (SRE_CHAR *)state->end;
1242 if (prefix_len > end - ptr)
1243 return 0;
1244#if SIZEOF_SRE_CHAR < 4
1245 for (i = 0; i < prefix_len; i++)
1246 if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1247 return 0; /* literal can't match: doesn't fit in char width */
1248#endif
1249 while (ptr < end) {
1250 SRE_CHAR c = (SRE_CHAR) prefix[0];
1251 while (*ptr++ != c) {
1252 if (ptr >= end)
1253 return 0;
1254 }
1255 if (ptr >= end)
1256 return 0;
1257
1258 i = 1;
1259 do {
1260 if (*ptr == (SRE_CHAR) prefix[i]) {
1261 if (++i != prefix_len) {
1262 if (++ptr >= end)
1263 return 0;
1264 continue;
1265 }
1266 /* found a potential match */
1267 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1268 state->start = ptr - (prefix_len - 1);
1269 state->ptr = ptr - (prefix_len - prefix_skip - 1);
1270 if (flags & SRE_INFO_LITERAL)
1271 return 1; /* we got all of it */
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03001272 status = SRE(match)(state, pattern + 2*prefix_skip, 0);
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001273 if (status != 0)
1274 return status;
1275 /* close but no cigar -- try again */
1276 if (++ptr >= end)
1277 return 0;
1278 }
1279 i = overlap[i];
1280 } while (i != 0);
1281 }
1282 return 0;
1283 }
1284#endif
1285
1286 if (pattern[0] == SRE_OP_LITERAL) {
1287 /* pattern starts with a literal character. this is used
1288 for short prefixes, and if fast search is disabled */
1289 SRE_CHAR c = (SRE_CHAR) pattern[1];
1290#if SIZEOF_SRE_CHAR < 4
1291 if ((SRE_CODE) c != pattern[1])
1292 return 0; /* literal can't match: doesn't fit in char width */
1293#endif
1294 end = (SRE_CHAR *)state->end;
1295 while (ptr < end) {
1296 while (*ptr != c) {
1297 if (++ptr >= end)
1298 return 0;
1299 }
1300 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1301 state->start = ptr;
1302 state->ptr = ++ptr;
1303 if (flags & SRE_INFO_LITERAL)
1304 return 1; /* we got all of it */
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03001305 status = SRE(match)(state, pattern + 2, 0);
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001306 if (status != 0)
1307 break;
1308 }
1309 } else if (charset) {
1310 /* pattern starts with a character from a known set */
1311 end = (SRE_CHAR *)state->end;
1312 for (;;) {
1313 while (ptr < end && !SRE(charset)(charset, *ptr))
1314 ptr++;
1315 if (ptr >= end)
1316 return 0;
1317 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1318 state->start = ptr;
1319 state->ptr = ptr;
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03001320 status = SRE(match)(state, pattern, 0);
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001321 if (status != 0)
1322 break;
1323 ptr++;
1324 }
1325 } else
1326 /* general case */
1327 while (ptr <= end) {
1328 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1329 state->start = state->ptr = ptr++;
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03001330 status = SRE(match)(state, pattern, 0);
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001331 if (status != 0)
1332 break;
1333 }
1334
1335 return status;
1336}
1337
1338#undef SRE_CHAR
1339#undef SIZEOF_SRE_CHAR
1340#undef SRE
1341
1342/* vim:ts=4:sw=4:et
1343*/