blob: 214c22af7e094e6323393df835f1505a3eee3bcb [file] [log] [blame]
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +03001/*
2 * Secret Labs' Regular Expression Engine
3 *
4 * regular expression matching engine
5 *
6 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
7 *
8 * See the _sre.c file for information on usage and redistribution.
9 */
10
11/* String matching engine */
12
13/* This file is included three times, with different character settings */
14
15LOCAL(int)
16SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
17{
18 /* check if pointer is at given position */
19
20 Py_ssize_t thisp, thatp;
21
22 switch (at) {
23
24 case SRE_AT_BEGINNING:
25 case SRE_AT_BEGINNING_STRING:
26 return ((void*) ptr == state->beginning);
27
28 case SRE_AT_BEGINNING_LINE:
29 return ((void*) ptr == state->beginning ||
30 SRE_IS_LINEBREAK((int) ptr[-1]));
31
32 case SRE_AT_END:
33 return (((void*) (ptr+1) == state->end &&
34 SRE_IS_LINEBREAK((int) ptr[0])) ||
35 ((void*) ptr == state->end));
36
37 case SRE_AT_END_LINE:
38 return ((void*) ptr == state->end ||
39 SRE_IS_LINEBREAK((int) ptr[0]));
40
41 case SRE_AT_END_STRING:
42 return ((void*) ptr == state->end);
43
44 case SRE_AT_BOUNDARY:
45 if (state->beginning == state->end)
46 return 0;
47 thatp = ((void*) ptr > state->beginning) ?
48 SRE_IS_WORD((int) ptr[-1]) : 0;
49 thisp = ((void*) ptr < state->end) ?
50 SRE_IS_WORD((int) ptr[0]) : 0;
51 return thisp != thatp;
52
53 case SRE_AT_NON_BOUNDARY:
54 if (state->beginning == state->end)
55 return 0;
56 thatp = ((void*) ptr > state->beginning) ?
57 SRE_IS_WORD((int) ptr[-1]) : 0;
58 thisp = ((void*) ptr < state->end) ?
59 SRE_IS_WORD((int) ptr[0]) : 0;
60 return thisp == thatp;
61
62 case SRE_AT_LOC_BOUNDARY:
63 if (state->beginning == state->end)
64 return 0;
65 thatp = ((void*) ptr > state->beginning) ?
66 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
67 thisp = ((void*) ptr < state->end) ?
68 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
69 return thisp != thatp;
70
71 case SRE_AT_LOC_NON_BOUNDARY:
72 if (state->beginning == state->end)
73 return 0;
74 thatp = ((void*) ptr > state->beginning) ?
75 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
76 thisp = ((void*) ptr < state->end) ?
77 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
78 return thisp == thatp;
79
80 case SRE_AT_UNI_BOUNDARY:
81 if (state->beginning == state->end)
82 return 0;
83 thatp = ((void*) ptr > state->beginning) ?
84 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
85 thisp = ((void*) ptr < state->end) ?
86 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
87 return thisp != thatp;
88
89 case SRE_AT_UNI_NON_BOUNDARY:
90 if (state->beginning == state->end)
91 return 0;
92 thatp = ((void*) ptr > state->beginning) ?
93 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
94 thisp = ((void*) ptr < state->end) ?
95 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
96 return thisp == thatp;
97
98 }
99
100 return 0;
101}
102
103LOCAL(int)
104SRE(charset)(SRE_CODE* set, SRE_CODE ch)
105{
106 /* check if character is a member of the given set */
107
108 int ok = 1;
109
110 for (;;) {
111 switch (*set++) {
112
113 case SRE_OP_FAILURE:
114 return !ok;
115
116 case SRE_OP_LITERAL:
117 /* <LITERAL> <code> */
118 if (ch == set[0])
119 return ok;
120 set++;
121 break;
122
123 case SRE_OP_CATEGORY:
124 /* <CATEGORY> <code> */
125 if (sre_category(set[0], (int) ch))
126 return ok;
127 set++;
128 break;
129
130 case SRE_OP_CHARSET:
131 /* <CHARSET> <bitmap> */
132 if (ch < 256 &&
133 (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
134 return ok;
135 set += 256/SRE_CODE_BITS;
136 break;
137
138 case SRE_OP_RANGE:
139 /* <RANGE> <lower> <upper> */
140 if (set[0] <= ch && ch <= set[1])
141 return ok;
142 set += 2;
143 break;
144
145 case SRE_OP_NEGATE:
146 ok = !ok;
147 break;
148
149 case SRE_OP_BIGCHARSET:
150 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
151 {
152 Py_ssize_t count, block;
153 count = *(set++);
154
155 if (ch < 0x10000u)
156 block = ((unsigned char*)set)[ch >> 8];
157 else
158 block = -1;
159 set += 256/sizeof(SRE_CODE);
160 if (block >=0 &&
161 (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
162 (1u << (ch & (SRE_CODE_BITS-1)))))
163 return ok;
164 set += count * (256/SRE_CODE_BITS);
165 break;
166 }
167
168 default:
169 /* internal error -- there's not much we can do about it
170 here, so let's just pretend it didn't match... */
171 return 0;
172 }
173 }
174}
175
176LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern);
177
178LOCAL(Py_ssize_t)
179SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
180{
181 SRE_CODE chr;
182 SRE_CHAR c;
183 SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
184 SRE_CHAR* end = (SRE_CHAR *)state->end;
185 Py_ssize_t i;
186
187 /* adjust end */
188 if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
189 end = ptr + maxcount;
190
191 switch (pattern[0]) {
192
193 case SRE_OP_IN:
194 /* repeated set */
195 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
196 while (ptr < end && SRE(charset)(pattern + 2, *ptr))
197 ptr++;
198 break;
199
200 case SRE_OP_ANY:
201 /* repeated dot wildcard. */
202 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
203 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
204 ptr++;
205 break;
206
207 case SRE_OP_ANY_ALL:
208 /* repeated dot wildcard. skip to the end of the target
209 string, and backtrack from there */
210 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
211 ptr = end;
212 break;
213
214 case SRE_OP_LITERAL:
215 /* repeated literal */
216 chr = pattern[1];
217 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
218 c = (SRE_CHAR) chr;
219#if SIZEOF_SRE_CHAR < 4
220 if ((SRE_CODE) c != chr)
221 ; /* literal can't match: doesn't fit in char width */
222 else
223#endif
224 while (ptr < end && *ptr == c)
225 ptr++;
226 break;
227
228 case SRE_OP_LITERAL_IGNORE:
229 /* repeated literal */
230 chr = pattern[1];
231 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
232 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
233 ptr++;
234 break;
235
236 case SRE_OP_NOT_LITERAL:
237 /* repeated non-literal */
238 chr = pattern[1];
239 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
240 c = (SRE_CHAR) chr;
241#if SIZEOF_SRE_CHAR < 4
242 if ((SRE_CODE) c != chr)
243 ptr = end; /* literal can't match: doesn't fit in char width */
244 else
245#endif
246 while (ptr < end && *ptr != c)
247 ptr++;
248 break;
249
250 case SRE_OP_NOT_LITERAL_IGNORE:
251 /* repeated non-literal */
252 chr = pattern[1];
253 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
254 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
255 ptr++;
256 break;
257
258 default:
259 /* repeated single character pattern */
260 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
261 while ((SRE_CHAR*) state->ptr < end) {
262 i = SRE(match)(state, pattern);
263 if (i < 0)
264 return i;
265 if (!i)
266 break;
267 }
268 TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
269 (SRE_CHAR*) state->ptr - ptr));
270 return (SRE_CHAR*) state->ptr - ptr;
271 }
272
273 TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
274 ptr - (SRE_CHAR*) state->ptr));
275 return ptr - (SRE_CHAR*) state->ptr;
276}
277
278#if 0 /* not used in this release */
279LOCAL(int)
280SRE(info)(SRE_STATE* state, SRE_CODE* pattern)
281{
282 /* check if an SRE_OP_INFO block matches at the current position.
283 returns the number of SRE_CODE objects to skip if successful, 0
284 if no match */
285
286 SRE_CHAR* end = (SRE_CHAR*) state->end;
287 SRE_CHAR* ptr = (SRE_CHAR*) state->ptr;
288 Py_ssize_t i;
289
290 /* check minimal length */
291 if (pattern[3] && end - ptr < pattern[3])
292 return 0;
293
294 /* check known prefix */
295 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
296 /* <length> <skip> <prefix data> <overlap data> */
297 for (i = 0; i < pattern[5]; i++)
298 if ((SRE_CODE) ptr[i] != pattern[7 + i])
299 return 0;
300 return pattern[0] + 2 * pattern[6];
301 }
302 return pattern[0];
303}
304#endif
305
306/* The macros below should be used to protect recursive SRE(match)()
307 * calls that *failed* and do *not* return immediately (IOW, those
308 * that will backtrack). Explaining:
309 *
310 * - Recursive SRE(match)() returned true: that's usually a success
311 * (besides atypical cases like ASSERT_NOT), therefore there's no
312 * reason to restore lastmark;
313 *
314 * - Recursive SRE(match)() returned false but the current SRE(match)()
315 * is returning to the caller: If the current SRE(match)() is the
316 * top function of the recursion, returning false will be a matching
317 * failure, and it doesn't matter where lastmark is pointing to.
318 * If it's *not* the top function, it will be a recursive SRE(match)()
319 * failure by itself, and the calling SRE(match)() will have to deal
320 * with the failure by the same rules explained here (it will restore
321 * lastmark by itself if necessary);
322 *
323 * - Recursive SRE(match)() returned false, and will continue the
324 * outside 'for' loop: must be protected when breaking, since the next
325 * OP could potentially depend on lastmark;
326 *
327 * - Recursive SRE(match)() returned false, and will be called again
328 * inside a local for/while loop: must be protected between each
329 * loop iteration, since the recursive SRE(match)() could do anything,
330 * and could potentially depend on lastmark.
331 *
332 * For more information, check the discussion at SF patch #712900.
333 */
334#define LASTMARK_SAVE() \
335 do { \
336 ctx->lastmark = state->lastmark; \
337 ctx->lastindex = state->lastindex; \
338 } while (0)
339#define LASTMARK_RESTORE() \
340 do { \
341 state->lastmark = ctx->lastmark; \
342 state->lastindex = ctx->lastindex; \
343 } while (0)
344
345#define RETURN_ERROR(i) do { return i; } while(0)
346#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
347#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
348
349#define RETURN_ON_ERROR(i) \
350 do { if (i < 0) RETURN_ERROR(i); } while (0)
351#define RETURN_ON_SUCCESS(i) \
352 do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
353#define RETURN_ON_FAILURE(i) \
354 do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
355
356#define SFY(x) #x
357
358#define DATA_STACK_ALLOC(state, type, ptr) \
359do { \
360 alloc_pos = state->data_stack_base; \
361 TRACE(("allocating %s in %" PY_FORMAT_SIZE_T "d " \
362 "(%" PY_FORMAT_SIZE_T "d)\n", \
363 SFY(type), alloc_pos, sizeof(type))); \
364 if (sizeof(type) > state->data_stack_size - alloc_pos) { \
365 int j = data_stack_grow(state, sizeof(type)); \
366 if (j < 0) return j; \
367 if (ctx_pos != -1) \
368 DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
369 } \
370 ptr = (type*)(state->data_stack+alloc_pos); \
371 state->data_stack_base += sizeof(type); \
372} while (0)
373
374#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
375do { \
376 TRACE(("looking up %s at %" PY_FORMAT_SIZE_T "d\n", SFY(type), pos)); \
377 ptr = (type*)(state->data_stack+pos); \
378} while (0)
379
380#define DATA_STACK_PUSH(state, data, size) \
381do { \
382 TRACE(("copy data in %p to %" PY_FORMAT_SIZE_T "d " \
383 "(%" PY_FORMAT_SIZE_T "d)\n", \
384 data, state->data_stack_base, size)); \
385 if (size > state->data_stack_size - state->data_stack_base) { \
386 int j = data_stack_grow(state, size); \
387 if (j < 0) return j; \
388 if (ctx_pos != -1) \
389 DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
390 } \
391 memcpy(state->data_stack+state->data_stack_base, data, size); \
392 state->data_stack_base += size; \
393} while (0)
394
395#define DATA_STACK_POP(state, data, size, discard) \
396do { \
397 TRACE(("copy data to %p from %" PY_FORMAT_SIZE_T "d " \
398 "(%" PY_FORMAT_SIZE_T "d)\n", \
399 data, state->data_stack_base-size, size)); \
400 memcpy(data, state->data_stack+state->data_stack_base-size, size); \
401 if (discard) \
402 state->data_stack_base -= size; \
403} while (0)
404
405#define DATA_STACK_POP_DISCARD(state, size) \
406do { \
407 TRACE(("discard data from %" PY_FORMAT_SIZE_T "d " \
408 "(%" PY_FORMAT_SIZE_T "d)\n", \
409 state->data_stack_base-size, size)); \
410 state->data_stack_base -= size; \
411} while(0)
412
413#define DATA_PUSH(x) \
414 DATA_STACK_PUSH(state, (x), sizeof(*(x)))
415#define DATA_POP(x) \
416 DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
417#define DATA_POP_DISCARD(x) \
418 DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
419#define DATA_ALLOC(t,p) \
420 DATA_STACK_ALLOC(state, t, p)
421#define DATA_LOOKUP_AT(t,p,pos) \
422 DATA_STACK_LOOKUP_AT(state,t,p,pos)
423
424#define MARK_PUSH(lastmark) \
425 do if (lastmark > 0) { \
426 i = lastmark; /* ctx->lastmark may change if reallocated */ \
427 DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
428 } while (0)
429#define MARK_POP(lastmark) \
430 do if (lastmark > 0) { \
431 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
432 } while (0)
433#define MARK_POP_KEEP(lastmark) \
434 do if (lastmark > 0) { \
435 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
436 } while (0)
437#define MARK_POP_DISCARD(lastmark) \
438 do if (lastmark > 0) { \
439 DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
440 } while (0)
441
442#define JUMP_NONE 0
443#define JUMP_MAX_UNTIL_1 1
444#define JUMP_MAX_UNTIL_2 2
445#define JUMP_MAX_UNTIL_3 3
446#define JUMP_MIN_UNTIL_1 4
447#define JUMP_MIN_UNTIL_2 5
448#define JUMP_MIN_UNTIL_3 6
449#define JUMP_REPEAT 7
450#define JUMP_REPEAT_ONE_1 8
451#define JUMP_REPEAT_ONE_2 9
452#define JUMP_MIN_REPEAT_ONE 10
453#define JUMP_BRANCH 11
454#define JUMP_ASSERT 12
455#define JUMP_ASSERT_NOT 13
456
457#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
458 DATA_ALLOC(SRE(match_context), nextctx); \
459 nextctx->last_ctx_pos = ctx_pos; \
460 nextctx->jump = jumpvalue; \
461 nextctx->pattern = nextpattern; \
462 ctx_pos = alloc_pos; \
463 ctx = nextctx; \
464 goto entrance; \
465 jumplabel: \
466 while (0) /* gcc doesn't like labels at end of scopes */ \
467
468typedef struct {
469 Py_ssize_t last_ctx_pos;
470 Py_ssize_t jump;
471 SRE_CHAR* ptr;
472 SRE_CODE* pattern;
473 Py_ssize_t count;
474 Py_ssize_t lastmark;
475 Py_ssize_t lastindex;
476 union {
477 SRE_CODE chr;
478 SRE_REPEAT* rep;
479 } u;
480} SRE(match_context);
481
482/* check if string matches the given pattern. returns <0 for
483 error, 0 for failure, and 1 for success */
484LOCAL(Py_ssize_t)
485SRE(match)(SRE_STATE* state, SRE_CODE* pattern)
486{
487 SRE_CHAR* end = (SRE_CHAR *)state->end;
488 Py_ssize_t alloc_pos, ctx_pos = -1;
489 Py_ssize_t i, ret = 0;
490 Py_ssize_t jump;
491 unsigned int sigcount=0;
492
493 SRE(match_context)* ctx;
494 SRE(match_context)* nextctx;
495
496 TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
497
498 DATA_ALLOC(SRE(match_context), ctx);
499 ctx->last_ctx_pos = -1;
500 ctx->jump = JUMP_NONE;
501 ctx->pattern = pattern;
502 ctx_pos = alloc_pos;
503
504entrance:
505
506 ctx->ptr = (SRE_CHAR *)state->ptr;
507
508 if (ctx->pattern[0] == SRE_OP_INFO) {
509 /* optimization info block */
510 /* <INFO> <1=skip> <2=flags> <3=min> ... */
511 if (ctx->pattern[3] && (Py_uintptr_t)(end - ctx->ptr) < ctx->pattern[3]) {
512 TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, "
513 "need %" PY_FORMAT_SIZE_T "d)\n",
514 end - ctx->ptr, (Py_ssize_t) ctx->pattern[3]));
515 RETURN_FAILURE;
516 }
517 ctx->pattern += ctx->pattern[1] + 1;
518 }
519
520 for (;;) {
521 ++sigcount;
522 if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
523 RETURN_ERROR(SRE_ERROR_INTERRUPTED);
524
525 switch (*ctx->pattern++) {
526
527 case SRE_OP_MARK:
528 /* set mark */
529 /* <MARK> <gid> */
530 TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
531 ctx->ptr, ctx->pattern[0]));
532 i = ctx->pattern[0];
533 if (i & 1)
534 state->lastindex = i/2 + 1;
535 if (i > state->lastmark) {
536 /* state->lastmark is the highest valid index in the
537 state->mark array. If it is increased by more than 1,
538 the intervening marks must be set to NULL to signal
539 that these marks have not been encountered. */
540 Py_ssize_t j = state->lastmark + 1;
541 while (j < i)
542 state->mark[j++] = NULL;
543 state->lastmark = i;
544 }
545 state->mark[i] = ctx->ptr;
546 ctx->pattern++;
547 break;
548
549 case SRE_OP_LITERAL:
550 /* match literal string */
551 /* <LITERAL> <code> */
552 TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
553 ctx->ptr, *ctx->pattern));
554 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
555 RETURN_FAILURE;
556 ctx->pattern++;
557 ctx->ptr++;
558 break;
559
560 case SRE_OP_NOT_LITERAL:
561 /* match anything that is not literal character */
562 /* <NOT_LITERAL> <code> */
563 TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
564 ctx->ptr, *ctx->pattern));
565 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
566 RETURN_FAILURE;
567 ctx->pattern++;
568 ctx->ptr++;
569 break;
570
571 case SRE_OP_SUCCESS:
572 /* end of pattern */
573 TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
574 state->ptr = ctx->ptr;
575 RETURN_SUCCESS;
576
577 case SRE_OP_AT:
578 /* match at given position */
579 /* <AT> <code> */
580 TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
581 if (!SRE(at)(state, ctx->ptr, *ctx->pattern))
582 RETURN_FAILURE;
583 ctx->pattern++;
584 break;
585
586 case SRE_OP_CATEGORY:
587 /* match at given category */
588 /* <CATEGORY> <code> */
589 TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
590 ctx->ptr, *ctx->pattern));
591 if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
592 RETURN_FAILURE;
593 ctx->pattern++;
594 ctx->ptr++;
595 break;
596
597 case SRE_OP_ANY:
598 /* match anything (except a newline) */
599 /* <ANY> */
600 TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
601 if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
602 RETURN_FAILURE;
603 ctx->ptr++;
604 break;
605
606 case SRE_OP_ANY_ALL:
607 /* match anything */
608 /* <ANY_ALL> */
609 TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
610 if (ctx->ptr >= end)
611 RETURN_FAILURE;
612 ctx->ptr++;
613 break;
614
615 case SRE_OP_IN:
616 /* match set member (or non_member) */
617 /* <IN> <skip> <set> */
618 TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
619 if (ctx->ptr >= end || !SRE(charset)(ctx->pattern + 1, *ctx->ptr))
620 RETURN_FAILURE;
621 ctx->pattern += ctx->pattern[0];
622 ctx->ptr++;
623 break;
624
625 case SRE_OP_LITERAL_IGNORE:
626 TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
627 ctx->pattern, ctx->ptr, ctx->pattern[0]));
628 if (ctx->ptr >= end ||
629 state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
630 RETURN_FAILURE;
631 ctx->pattern++;
632 ctx->ptr++;
633 break;
634
635 case SRE_OP_NOT_LITERAL_IGNORE:
636 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
637 ctx->pattern, ctx->ptr, *ctx->pattern));
638 if (ctx->ptr >= end ||
639 state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
640 RETURN_FAILURE;
641 ctx->pattern++;
642 ctx->ptr++;
643 break;
644
645 case SRE_OP_IN_IGNORE:
646 TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
647 if (ctx->ptr >= end
648 || !SRE(charset)(ctx->pattern+1,
649 (SRE_CODE)state->lower(*ctx->ptr)))
650 RETURN_FAILURE;
651 ctx->pattern += ctx->pattern[0];
652 ctx->ptr++;
653 break;
654
655 case SRE_OP_JUMP:
656 case SRE_OP_INFO:
657 /* jump forward */
658 /* <JUMP> <offset> */
659 TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
660 ctx->ptr, ctx->pattern[0]));
661 ctx->pattern += ctx->pattern[0];
662 break;
663
664 case SRE_OP_BRANCH:
665 /* alternation */
666 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
667 TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
668 LASTMARK_SAVE();
669 ctx->u.rep = state->repeat;
670 if (ctx->u.rep)
671 MARK_PUSH(ctx->lastmark);
672 for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
673 if (ctx->pattern[1] == SRE_OP_LITERAL &&
674 (ctx->ptr >= end ||
675 (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
676 continue;
677 if (ctx->pattern[1] == SRE_OP_IN &&
678 (ctx->ptr >= end ||
679 !SRE(charset)(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
680 continue;
681 state->ptr = ctx->ptr;
682 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
683 if (ret) {
684 if (ctx->u.rep)
685 MARK_POP_DISCARD(ctx->lastmark);
686 RETURN_ON_ERROR(ret);
687 RETURN_SUCCESS;
688 }
689 if (ctx->u.rep)
690 MARK_POP_KEEP(ctx->lastmark);
691 LASTMARK_RESTORE();
692 }
693 if (ctx->u.rep)
694 MARK_POP_DISCARD(ctx->lastmark);
695 RETURN_FAILURE;
696
697 case SRE_OP_REPEAT_ONE:
698 /* match repeated sequence (maximizing regexp) */
699
700 /* this operator only works if the repeated item is
701 exactly one character wide, and we're not already
702 collecting backtracking points. for other cases,
703 use the MAX_REPEAT operator */
704
705 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
706
707 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
708 ctx->pattern[1], ctx->pattern[2]));
709
710 if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
711 RETURN_FAILURE; /* cannot match */
712
713 state->ptr = ctx->ptr;
714
715 ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[2]);
716 RETURN_ON_ERROR(ret);
717 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
718 ctx->count = ret;
719 ctx->ptr += ctx->count;
720
721 /* when we arrive here, count contains the number of
722 matches, and ctx->ptr points to the tail of the target
723 string. check if the rest of the pattern matches,
724 and backtrack if not. */
725
726 if (ctx->count < (Py_ssize_t) ctx->pattern[1])
727 RETURN_FAILURE;
728
729 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
730 /* tail is empty. we're finished */
731 state->ptr = ctx->ptr;
732 RETURN_SUCCESS;
733 }
734
735 LASTMARK_SAVE();
736
737 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
738 /* tail starts with a literal. skip positions where
739 the rest of the pattern cannot possibly match */
740 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
741 for (;;) {
742 while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
743 (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
744 ctx->ptr--;
745 ctx->count--;
746 }
747 if (ctx->count < (Py_ssize_t) ctx->pattern[1])
748 break;
749 state->ptr = ctx->ptr;
750 DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
751 ctx->pattern+ctx->pattern[0]);
752 if (ret) {
753 RETURN_ON_ERROR(ret);
754 RETURN_SUCCESS;
755 }
756
757 LASTMARK_RESTORE();
758
759 ctx->ptr--;
760 ctx->count--;
761 }
762
763 } else {
764 /* general case */
765 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
766 state->ptr = ctx->ptr;
767 DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
768 ctx->pattern+ctx->pattern[0]);
769 if (ret) {
770 RETURN_ON_ERROR(ret);
771 RETURN_SUCCESS;
772 }
773 ctx->ptr--;
774 ctx->count--;
775 LASTMARK_RESTORE();
776 }
777 }
778 RETURN_FAILURE;
779
780 case SRE_OP_MIN_REPEAT_ONE:
781 /* match repeated sequence (minimizing regexp) */
782
783 /* this operator only works if the repeated item is
784 exactly one character wide, and we're not already
785 collecting backtracking points. for other cases,
786 use the MIN_REPEAT operator */
787
788 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
789
790 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
791 ctx->pattern[1], ctx->pattern[2]));
792
793 if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
794 RETURN_FAILURE; /* cannot match */
795
796 state->ptr = ctx->ptr;
797
798 if (ctx->pattern[1] == 0)
799 ctx->count = 0;
800 else {
801 /* count using pattern min as the maximum */
802 ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[1]);
803 RETURN_ON_ERROR(ret);
804 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
805 if (ret < (Py_ssize_t) ctx->pattern[1])
806 /* didn't match minimum number of times */
807 RETURN_FAILURE;
808 /* advance past minimum matches of repeat */
809 ctx->count = ret;
810 ctx->ptr += ctx->count;
811 }
812
813 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
814 /* tail is empty. we're finished */
815 state->ptr = ctx->ptr;
816 RETURN_SUCCESS;
817
818 } else {
819 /* general case */
820 LASTMARK_SAVE();
821 while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
822 || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
823 state->ptr = ctx->ptr;
824 DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
825 ctx->pattern+ctx->pattern[0]);
826 if (ret) {
827 RETURN_ON_ERROR(ret);
828 RETURN_SUCCESS;
829 }
830 state->ptr = ctx->ptr;
831 ret = SRE(count)(state, ctx->pattern+3, 1);
832 RETURN_ON_ERROR(ret);
833 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
834 if (ret == 0)
835 break;
836 assert(ret == 1);
837 ctx->ptr++;
838 ctx->count++;
839 LASTMARK_RESTORE();
840 }
841 }
842 RETURN_FAILURE;
843
844 case SRE_OP_REPEAT:
845 /* create repeat context. all the hard work is done
846 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
847 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
848 TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
849 ctx->pattern[1], ctx->pattern[2]));
850
851 /* install new repeat context */
852 ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
853 if (!ctx->u.rep) {
854 PyErr_NoMemory();
855 RETURN_FAILURE;
856 }
857 ctx->u.rep->count = -1;
858 ctx->u.rep->pattern = ctx->pattern;
859 ctx->u.rep->prev = state->repeat;
860 ctx->u.rep->last_ptr = NULL;
861 state->repeat = ctx->u.rep;
862
863 state->ptr = ctx->ptr;
864 DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
865 state->repeat = ctx->u.rep->prev;
866 PyObject_FREE(ctx->u.rep);
867
868 if (ret) {
869 RETURN_ON_ERROR(ret);
870 RETURN_SUCCESS;
871 }
872 RETURN_FAILURE;
873
874 case SRE_OP_MAX_UNTIL:
875 /* maximizing repeat */
876 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
877
878 /* FIXME: we probably need to deal with zero-width
879 matches in here... */
880
881 ctx->u.rep = state->repeat;
882 if (!ctx->u.rep)
883 RETURN_ERROR(SRE_ERROR_STATE);
884
885 state->ptr = ctx->ptr;
886
887 ctx->count = ctx->u.rep->count+1;
888
889 TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
890 ctx->ptr, ctx->count));
891
892 if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
893 /* not enough matches */
894 ctx->u.rep->count = ctx->count;
895 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
896 ctx->u.rep->pattern+3);
897 if (ret) {
898 RETURN_ON_ERROR(ret);
899 RETURN_SUCCESS;
900 }
901 ctx->u.rep->count = ctx->count-1;
902 state->ptr = ctx->ptr;
903 RETURN_FAILURE;
904 }
905
906 if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
907 ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
908 state->ptr != ctx->u.rep->last_ptr) {
909 /* we may have enough matches, but if we can
910 match another item, do so */
911 ctx->u.rep->count = ctx->count;
912 LASTMARK_SAVE();
913 MARK_PUSH(ctx->lastmark);
914 /* zero-width match protection */
915 DATA_PUSH(&ctx->u.rep->last_ptr);
916 ctx->u.rep->last_ptr = state->ptr;
917 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
918 ctx->u.rep->pattern+3);
919 DATA_POP(&ctx->u.rep->last_ptr);
920 if (ret) {
921 MARK_POP_DISCARD(ctx->lastmark);
922 RETURN_ON_ERROR(ret);
923 RETURN_SUCCESS;
924 }
925 MARK_POP(ctx->lastmark);
926 LASTMARK_RESTORE();
927 ctx->u.rep->count = ctx->count-1;
928 state->ptr = ctx->ptr;
929 }
930
931 /* cannot match more repeated items here. make sure the
932 tail matches */
933 state->repeat = ctx->u.rep->prev;
934 DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
935 RETURN_ON_SUCCESS(ret);
936 state->repeat = ctx->u.rep;
937 state->ptr = ctx->ptr;
938 RETURN_FAILURE;
939
940 case SRE_OP_MIN_UNTIL:
941 /* minimizing repeat */
942 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
943
944 ctx->u.rep = state->repeat;
945 if (!ctx->u.rep)
946 RETURN_ERROR(SRE_ERROR_STATE);
947
948 state->ptr = ctx->ptr;
949
950 ctx->count = ctx->u.rep->count+1;
951
952 TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern,
953 ctx->ptr, ctx->count, ctx->u.rep->pattern));
954
955 if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
956 /* not enough matches */
957 ctx->u.rep->count = ctx->count;
958 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
959 ctx->u.rep->pattern+3);
960 if (ret) {
961 RETURN_ON_ERROR(ret);
962 RETURN_SUCCESS;
963 }
964 ctx->u.rep->count = ctx->count-1;
965 state->ptr = ctx->ptr;
966 RETURN_FAILURE;
967 }
968
969 LASTMARK_SAVE();
970
971 /* see if the tail matches */
972 state->repeat = ctx->u.rep->prev;
973 DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
974 if (ret) {
975 RETURN_ON_ERROR(ret);
976 RETURN_SUCCESS;
977 }
978
979 state->repeat = ctx->u.rep;
980 state->ptr = ctx->ptr;
981
982 LASTMARK_RESTORE();
983
984 if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
985 && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
986 state->ptr == ctx->u.rep->last_ptr)
987 RETURN_FAILURE;
988
989 ctx->u.rep->count = ctx->count;
990 /* zero-width match protection */
991 DATA_PUSH(&ctx->u.rep->last_ptr);
992 ctx->u.rep->last_ptr = state->ptr;
993 DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
994 ctx->u.rep->pattern+3);
995 DATA_POP(&ctx->u.rep->last_ptr);
996 if (ret) {
997 RETURN_ON_ERROR(ret);
998 RETURN_SUCCESS;
999 }
1000 ctx->u.rep->count = ctx->count-1;
1001 state->ptr = ctx->ptr;
1002 RETURN_FAILURE;
1003
1004 case SRE_OP_GROUPREF:
1005 /* match backreference */
1006 TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
1007 ctx->ptr, ctx->pattern[0]));
1008 i = ctx->pattern[0];
1009 {
1010 Py_ssize_t groupref = i+i;
1011 if (groupref >= state->lastmark) {
1012 RETURN_FAILURE;
1013 } else {
1014 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1015 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1016 if (!p || !e || e < p)
1017 RETURN_FAILURE;
1018 while (p < e) {
1019 if (ctx->ptr >= end || *ctx->ptr != *p)
1020 RETURN_FAILURE;
1021 p++;
1022 ctx->ptr++;
1023 }
1024 }
1025 }
1026 ctx->pattern++;
1027 break;
1028
1029 case SRE_OP_GROUPREF_IGNORE:
1030 /* match backreference */
1031 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
1032 ctx->ptr, ctx->pattern[0]));
1033 i = ctx->pattern[0];
1034 {
1035 Py_ssize_t groupref = i+i;
1036 if (groupref >= state->lastmark) {
1037 RETURN_FAILURE;
1038 } else {
1039 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1040 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1041 if (!p || !e || e < p)
1042 RETURN_FAILURE;
1043 while (p < e) {
1044 if (ctx->ptr >= end ||
1045 state->lower(*ctx->ptr) != state->lower(*p))
1046 RETURN_FAILURE;
1047 p++;
1048 ctx->ptr++;
1049 }
1050 }
1051 }
1052 ctx->pattern++;
1053 break;
1054
1055 case SRE_OP_GROUPREF_EXISTS:
1056 TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
1057 ctx->ptr, ctx->pattern[0]));
1058 /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1059 i = ctx->pattern[0];
1060 {
1061 Py_ssize_t groupref = i+i;
1062 if (groupref >= state->lastmark) {
1063 ctx->pattern += ctx->pattern[1];
1064 break;
1065 } else {
1066 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1067 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1068 if (!p || !e || e < p) {
1069 ctx->pattern += ctx->pattern[1];
1070 break;
1071 }
1072 }
1073 }
1074 ctx->pattern += 2;
1075 break;
1076
1077 case SRE_OP_ASSERT:
1078 /* assert subpattern */
1079 /* <ASSERT> <skip> <back> <pattern> */
1080 TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
1081 ctx->ptr, ctx->pattern[1]));
1082 state->ptr = ctx->ptr - ctx->pattern[1];
1083 if (state->ptr < state->beginning)
1084 RETURN_FAILURE;
1085 DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
1086 RETURN_ON_FAILURE(ret);
1087 ctx->pattern += ctx->pattern[0];
1088 break;
1089
1090 case SRE_OP_ASSERT_NOT:
1091 /* assert not subpattern */
1092 /* <ASSERT_NOT> <skip> <back> <pattern> */
1093 TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
1094 ctx->ptr, ctx->pattern[1]));
1095 state->ptr = ctx->ptr - ctx->pattern[1];
1096 if (state->ptr >= state->beginning) {
1097 DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
1098 if (ret) {
1099 RETURN_ON_ERROR(ret);
1100 RETURN_FAILURE;
1101 }
1102 }
1103 ctx->pattern += ctx->pattern[0];
1104 break;
1105
1106 case SRE_OP_FAILURE:
1107 /* immediate failure */
1108 TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
1109 RETURN_FAILURE;
1110
1111 default:
1112 TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
1113 ctx->pattern[-1]));
1114 RETURN_ERROR(SRE_ERROR_ILLEGAL);
1115 }
1116 }
1117
1118exit:
1119 ctx_pos = ctx->last_ctx_pos;
1120 jump = ctx->jump;
1121 DATA_POP_DISCARD(ctx);
1122 if (ctx_pos == -1)
1123 return ret;
1124 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1125
1126 switch (jump) {
1127 case JUMP_MAX_UNTIL_2:
1128 TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
1129 goto jump_max_until_2;
1130 case JUMP_MAX_UNTIL_3:
1131 TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
1132 goto jump_max_until_3;
1133 case JUMP_MIN_UNTIL_2:
1134 TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
1135 goto jump_min_until_2;
1136 case JUMP_MIN_UNTIL_3:
1137 TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
1138 goto jump_min_until_3;
1139 case JUMP_BRANCH:
1140 TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
1141 goto jump_branch;
1142 case JUMP_MAX_UNTIL_1:
1143 TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
1144 goto jump_max_until_1;
1145 case JUMP_MIN_UNTIL_1:
1146 TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
1147 goto jump_min_until_1;
1148 case JUMP_REPEAT:
1149 TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
1150 goto jump_repeat;
1151 case JUMP_REPEAT_ONE_1:
1152 TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
1153 goto jump_repeat_one_1;
1154 case JUMP_REPEAT_ONE_2:
1155 TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
1156 goto jump_repeat_one_2;
1157 case JUMP_MIN_REPEAT_ONE:
1158 TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
1159 goto jump_min_repeat_one;
1160 case JUMP_ASSERT:
1161 TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
1162 goto jump_assert;
1163 case JUMP_ASSERT_NOT:
1164 TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
1165 goto jump_assert_not;
1166 case JUMP_NONE:
1167 TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
1168 ctx->ptr, ret));
1169 break;
1170 }
1171
1172 return ret; /* should never get here */
1173}
1174
1175LOCAL(Py_ssize_t)
1176SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1177{
1178 SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1179 SRE_CHAR* end = (SRE_CHAR *)state->end;
1180 Py_ssize_t status = 0;
1181 Py_ssize_t prefix_len = 0;
1182 Py_ssize_t prefix_skip = 0;
1183 SRE_CODE* prefix = NULL;
1184 SRE_CODE* charset = NULL;
1185 SRE_CODE* overlap = NULL;
1186 int flags = 0;
1187
1188 if (pattern[0] == SRE_OP_INFO) {
1189 /* optimization info block */
1190 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
1191
1192 flags = pattern[2];
1193
1194 if (pattern[3] > 1) {
1195 /* adjust end point (but make sure we leave at least one
1196 character in there, so literal search will work) */
1197 end -= pattern[3] - 1;
1198 if (end <= ptr)
1199 end = ptr;
1200 }
1201
1202 if (flags & SRE_INFO_PREFIX) {
1203 /* pattern starts with a known prefix */
1204 /* <length> <skip> <prefix data> <overlap data> */
1205 prefix_len = pattern[5];
1206 prefix_skip = pattern[6];
1207 prefix = pattern + 7;
1208 overlap = prefix + prefix_len - 1;
1209 } else if (flags & SRE_INFO_CHARSET)
1210 /* pattern starts with a character from a known set */
1211 /* <charset> */
1212 charset = pattern + 5;
1213
1214 pattern += 1 + pattern[1];
1215 }
1216
1217 TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n",
1218 prefix, prefix_len, prefix_skip));
1219 TRACE(("charset = %p\n", charset));
1220
1221#if defined(USE_FAST_SEARCH)
1222 if (prefix_len > 1) {
1223 /* pattern starts with a known prefix. use the overlap
1224 table to skip forward as fast as we possibly can */
1225 Py_ssize_t i = 0;
1226
1227 end = (SRE_CHAR *)state->end;
1228 if (prefix_len > end - ptr)
1229 return 0;
1230#if SIZEOF_SRE_CHAR < 4
1231 for (i = 0; i < prefix_len; i++)
1232 if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1233 return 0; /* literal can't match: doesn't fit in char width */
1234#endif
1235 while (ptr < end) {
1236 SRE_CHAR c = (SRE_CHAR) prefix[0];
1237 while (*ptr++ != c) {
1238 if (ptr >= end)
1239 return 0;
1240 }
1241 if (ptr >= end)
1242 return 0;
1243
1244 i = 1;
1245 do {
1246 if (*ptr == (SRE_CHAR) prefix[i]) {
1247 if (++i != prefix_len) {
1248 if (++ptr >= end)
1249 return 0;
1250 continue;
1251 }
1252 /* found a potential match */
1253 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1254 state->start = ptr - (prefix_len - 1);
1255 state->ptr = ptr - (prefix_len - prefix_skip - 1);
1256 if (flags & SRE_INFO_LITERAL)
1257 return 1; /* we got all of it */
1258 status = SRE(match)(state, pattern + 2*prefix_skip);
1259 if (status != 0)
1260 return status;
1261 /* close but no cigar -- try again */
1262 if (++ptr >= end)
1263 return 0;
1264 }
1265 i = overlap[i];
1266 } while (i != 0);
1267 }
1268 return 0;
1269 }
1270#endif
1271
1272 if (pattern[0] == SRE_OP_LITERAL) {
1273 /* pattern starts with a literal character. this is used
1274 for short prefixes, and if fast search is disabled */
1275 SRE_CHAR c = (SRE_CHAR) pattern[1];
1276#if SIZEOF_SRE_CHAR < 4
1277 if ((SRE_CODE) c != pattern[1])
1278 return 0; /* literal can't match: doesn't fit in char width */
1279#endif
1280 end = (SRE_CHAR *)state->end;
1281 while (ptr < end) {
1282 while (*ptr != c) {
1283 if (++ptr >= end)
1284 return 0;
1285 }
1286 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1287 state->start = ptr;
1288 state->ptr = ++ptr;
1289 if (flags & SRE_INFO_LITERAL)
1290 return 1; /* we got all of it */
1291 status = SRE(match)(state, pattern + 2);
1292 if (status != 0)
1293 break;
1294 }
1295 } else if (charset) {
1296 /* pattern starts with a character from a known set */
1297 end = (SRE_CHAR *)state->end;
1298 for (;;) {
1299 while (ptr < end && !SRE(charset)(charset, *ptr))
1300 ptr++;
1301 if (ptr >= end)
1302 return 0;
1303 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1304 state->start = ptr;
1305 state->ptr = ptr;
1306 status = SRE(match)(state, pattern);
1307 if (status != 0)
1308 break;
1309 ptr++;
1310 }
1311 } else
1312 /* general case */
1313 while (ptr <= end) {
1314 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1315 state->start = state->ptr = ptr++;
1316 status = SRE(match)(state, pattern);
1317 if (status != 0)
1318 break;
1319 }
1320
1321 return status;
1322}
1323
1324#undef SRE_CHAR
1325#undef SIZEOF_SRE_CHAR
1326#undef SRE
1327
1328/* vim:ts=4:sw=4:et
1329*/