blob: 6574720b609f4ca96c5dbf91fff6a73c3011416e [file] [log] [blame]
Thomas Wouters477c8d52006-05-27 19:21:47 +00001/* stringlib: fastsearch implementation */
2
Thomas Wouters477c8d52006-05-27 19:21:47 +00003#define STRINGLIB_FASTSEARCH_H
4
5/* fast search/count implementation, based on a mix between boyer-
6 moore and horspool, with a few more bells and whistles on the top.
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +00007 for some more background, see: http://effbot.org/zone/stringlib.htm */
Thomas Wouters477c8d52006-05-27 19:21:47 +00008
9/* note: fastsearch may access s[n], which isn't a problem when using
10 Python's ordinary string types, but may cause problems if you're
11 using this code in other contexts. also, the count mode returns -1
Dennis Sweeney73a85c42021-02-28 13:20:50 -050012 if there cannot possibly be a match in the target string, and 0 if
Thomas Wouters477c8d52006-05-27 19:21:47 +000013 it has actually checked for matches, but didn't find any. callers
14 beware! */
15
Dennis Sweeney73a85c42021-02-28 13:20:50 -050016/* If the strings are long enough, use Crochemore and Perrin's Two-Way
17 algorithm, which has worst-case O(n) runtime and best-case O(n/k).
18 Also compute a table of shifts to achieve O(n/k) in more cases,
19 and often (data dependent) deduce larger shifts than pure C&P can
20 deduce. */
21
Thomas Wouters477c8d52006-05-27 19:21:47 +000022#define FAST_COUNT 0
23#define FAST_SEARCH 1
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +000024#define FAST_RSEARCH 2
Thomas Wouters477c8d52006-05-27 19:21:47 +000025
Antoine Pitrouf068f942010-01-13 14:19:12 +000026#if LONG_BIT >= 128
27#define STRINGLIB_BLOOM_WIDTH 128
28#elif LONG_BIT >= 64
29#define STRINGLIB_BLOOM_WIDTH 64
30#elif LONG_BIT >= 32
31#define STRINGLIB_BLOOM_WIDTH 32
32#else
33#error "LONG_BIT is smaller than 32"
34#endif
35
36#define STRINGLIB_BLOOM_ADD(mask, ch) \
37 ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
38#define STRINGLIB_BLOOM(mask, ch) \
39 ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
Antoine Pitrouf2c54842010-01-13 08:07:53 +000040
Serhiy Storchaka0a58f722017-03-30 09:11:10 +030041#if STRINGLIB_SIZEOF_CHAR == 1
42# define MEMCHR_CUT_OFF 15
43#else
44# define MEMCHR_CUT_OFF 40
45#endif
46
Serhiy Storchaka413fdce2015-11-14 15:42:17 +020047Py_LOCAL_INLINE(Py_ssize_t)
48STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
49{
50 const STRINGLIB_CHAR *p, *e;
51
52 p = s;
53 e = s + n;
Serhiy Storchaka0a58f722017-03-30 09:11:10 +030054 if (n > MEMCHR_CUT_OFF) {
Serhiy Storchaka413fdce2015-11-14 15:42:17 +020055#if STRINGLIB_SIZEOF_CHAR == 1
56 p = memchr(s, ch, n);
57 if (p != NULL)
58 return (p - s);
59 return -1;
60#else
Valentin Haenel60bba832019-09-11 14:43:29 +020061 /* use memchr if we can choose a needle without too many likely
Serhiy Storchaka413fdce2015-11-14 15:42:17 +020062 false positives */
Serhiy Storchaka0a58f722017-03-30 09:11:10 +030063 const STRINGLIB_CHAR *s1, *e1;
Serhiy Storchaka413fdce2015-11-14 15:42:17 +020064 unsigned char needle = ch & 0xff;
65 /* If looking for a multiple of 256, we'd have too
66 many false positives looking for the '\0' byte in UCS2
67 and UCS4 representations. */
68 if (needle != 0) {
Serhiy Storchaka0a58f722017-03-30 09:11:10 +030069 do {
Serhiy Storchaka413fdce2015-11-14 15:42:17 +020070 void *candidate = memchr(p, needle,
71 (e - p) * sizeof(STRINGLIB_CHAR));
72 if (candidate == NULL)
73 return -1;
Serhiy Storchaka0a58f722017-03-30 09:11:10 +030074 s1 = p;
Serhiy Storchaka413fdce2015-11-14 15:42:17 +020075 p = (const STRINGLIB_CHAR *)
76 _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
77 if (*p == ch)
78 return (p - s);
79 /* False positive */
80 p++;
Serhiy Storchaka0a58f722017-03-30 09:11:10 +030081 if (p - s1 > MEMCHR_CUT_OFF)
82 continue;
83 if (e - p <= MEMCHR_CUT_OFF)
84 break;
85 e1 = p + MEMCHR_CUT_OFF;
86 while (p != e1) {
87 if (*p == ch)
88 return (p - s);
89 p++;
90 }
Serhiy Storchaka413fdce2015-11-14 15:42:17 +020091 }
Serhiy Storchaka0a58f722017-03-30 09:11:10 +030092 while (e - p > MEMCHR_CUT_OFF);
Serhiy Storchaka413fdce2015-11-14 15:42:17 +020093 }
94#endif
95 }
96 while (p < e) {
97 if (*p == ch)
98 return (p - s);
99 p++;
100 }
101 return -1;
102}
Antoine Pitrou2c3b2302011-10-11 20:29:21 +0200103
104Py_LOCAL_INLINE(Py_ssize_t)
Serhiy Storchaka413fdce2015-11-14 15:42:17 +0200105STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
Antoine Pitrou2c3b2302011-10-11 20:29:21 +0200106{
Serhiy Storchaka413fdce2015-11-14 15:42:17 +0200107 const STRINGLIB_CHAR *p;
Antoine Pitrou2c3b2302011-10-11 20:29:21 +0200108#ifdef HAVE_MEMRCHR
109 /* memrchr() is a GNU extension, available since glibc 2.1.91.
110 it doesn't seem as optimized as memchr(), but is still quite
Serhiy Storchaka413fdce2015-11-14 15:42:17 +0200111 faster than our hand-written loop below */
Antoine Pitrou2c3b2302011-10-11 20:29:21 +0200112
Serhiy Storchaka0a58f722017-03-30 09:11:10 +0300113 if (n > MEMCHR_CUT_OFF) {
Serhiy Storchaka413fdce2015-11-14 15:42:17 +0200114#if STRINGLIB_SIZEOF_CHAR == 1
115 p = memrchr(s, ch, n);
116 if (p != NULL)
117 return (p - s);
118 return -1;
119#else
Valentin Haenel60bba832019-09-11 14:43:29 +0200120 /* use memrchr if we can choose a needle without too many likely
Serhiy Storchaka413fdce2015-11-14 15:42:17 +0200121 false positives */
Serhiy Storchaka0a58f722017-03-30 09:11:10 +0300122 const STRINGLIB_CHAR *s1;
123 Py_ssize_t n1;
Serhiy Storchaka413fdce2015-11-14 15:42:17 +0200124 unsigned char needle = ch & 0xff;
125 /* If looking for a multiple of 256, we'd have too
126 many false positives looking for the '\0' byte in UCS2
127 and UCS4 representations. */
128 if (needle != 0) {
Serhiy Storchaka0a58f722017-03-30 09:11:10 +0300129 do {
Serhiy Storchaka413fdce2015-11-14 15:42:17 +0200130 void *candidate = memrchr(s, needle,
131 n * sizeof(STRINGLIB_CHAR));
132 if (candidate == NULL)
133 return -1;
Serhiy Storchaka0a58f722017-03-30 09:11:10 +0300134 n1 = n;
Serhiy Storchaka413fdce2015-11-14 15:42:17 +0200135 p = (const STRINGLIB_CHAR *)
136 _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
137 n = p - s;
138 if (*p == ch)
139 return n;
140 /* False positive */
Serhiy Storchaka0a58f722017-03-30 09:11:10 +0300141 if (n1 - n > MEMCHR_CUT_OFF)
142 continue;
143 if (n <= MEMCHR_CUT_OFF)
144 break;
145 s1 = p - MEMCHR_CUT_OFF;
146 while (p > s1) {
147 p--;
148 if (*p == ch)
149 return (p - s);
150 }
151 n = p - s;
Serhiy Storchaka413fdce2015-11-14 15:42:17 +0200152 }
Serhiy Storchaka0a58f722017-03-30 09:11:10 +0300153 while (n > MEMCHR_CUT_OFF);
Serhiy Storchaka413fdce2015-11-14 15:42:17 +0200154 }
155#endif
156 }
157#endif /* HAVE_MEMRCHR */
158 p = s + n;
159 while (p > s) {
160 p--;
161 if (*p == ch)
162 return (p - s);
163 }
164 return -1;
Antoine Pitrou2c3b2302011-10-11 20:29:21 +0200165}
166
Serhiy Storchaka0a58f722017-03-30 09:11:10 +0300167#undef MEMCHR_CUT_OFF
168
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500169/* Change to a 1 to see logging comments walk through the algorithm. */
170#if 0 && STRINGLIB_SIZEOF_CHAR == 1
171# define LOG(...) printf(__VA_ARGS__)
172# define LOG_STRING(s, n) printf("\"%.*s\"", n, s)
173#else
174# define LOG(...)
175# define LOG_STRING(s, n)
176#endif
177
178Py_LOCAL_INLINE(Py_ssize_t)
179STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
180 Py_ssize_t *return_period, int invert_alphabet)
181{
182 /* Do a lexicographic search. Essentially this:
183 >>> max(needle[i:] for i in range(len(needle)+1))
184 Also find the period of the right half. */
185 Py_ssize_t max_suffix = 0;
186 Py_ssize_t candidate = 1;
187 Py_ssize_t k = 0;
188 // The period of the right half.
189 Py_ssize_t period = 1;
190
191 while (candidate + k < len_needle) {
192 // each loop increases candidate + k + max_suffix
193 STRINGLIB_CHAR a = needle[candidate + k];
194 STRINGLIB_CHAR b = needle[max_suffix + k];
195 // check if the suffix at candidate is better than max_suffix
196 if (invert_alphabet ? (b < a) : (a < b)) {
197 // Fell short of max_suffix.
198 // The next k + 1 characters are non-increasing
199 // from candidate, so they won't start a maximal suffix.
200 candidate += k + 1;
201 k = 0;
202 // We've ruled out any period smaller than what's
203 // been scanned since max_suffix.
204 period = candidate - max_suffix;
205 }
206 else if (a == b) {
207 if (k + 1 != period) {
208 // Keep scanning the equal strings
209 k++;
210 }
211 else {
212 // Matched a whole period.
213 // Start matching the next period.
214 candidate += period;
215 k = 0;
216 }
217 }
218 else {
219 // Did better than max_suffix, so replace it.
220 max_suffix = candidate;
221 candidate++;
222 k = 0;
223 period = 1;
224 }
225 }
226 *return_period = period;
227 return max_suffix;
228}
229
230Py_LOCAL_INLINE(Py_ssize_t)
231STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
232 Py_ssize_t len_needle,
233 Py_ssize_t *return_period)
234{
235 /* Do a "critical factorization", making it so that:
236 >>> needle = (left := needle[:cut]) + (right := needle[cut:])
237 where the "local period" of the cut is maximal.
238
239 The local period of the cut is the minimal length of a string w
240 such that (left endswith w or w endswith left)
241 and (right startswith w or w startswith left).
242
243 The Critical Factorization Theorem says that this maximal local
244 period is the global period of the string.
245
246 Crochemore and Perrin (1991) show that this cut can be computed
247 as the later of two cuts: one that gives a lexicographically
248 maximal right half, and one that gives the same with the
249 with respect to a reversed alphabet-ordering.
250
251 This is what we want to happen:
252 >>> x = "GCAGAGAG"
253 >>> cut, period = factorize(x)
254 >>> x[:cut], (right := x[cut:])
255 ('GC', 'AGAGAG')
256 >>> period # right half period
257 2
258 >>> right[period:] == right[:-period]
259 True
260
261 This is how the local period lines up in the above example:
262 GC | AGAGAG
263 AGAGAGC = AGAGAGC
264 The length of this minimal repetition is 7, which is indeed the
265 period of the original string. */
266
267 Py_ssize_t cut1, period1, cut2, period2, cut, period;
268 cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
269 cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
270
271 // Take the later cut.
272 if (cut1 > cut2) {
273 period = period1;
274 cut = cut1;
275 }
276 else {
277 period = period2;
278 cut = cut2;
279 }
280
281 LOG("split: "); LOG_STRING(needle, cut);
282 LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
283 LOG("\n");
284
285 *return_period = period;
286 return cut;
287}
288
289#define SHIFT_TYPE uint8_t
290#define NOT_FOUND ((1U<<(8*sizeof(SHIFT_TYPE))) - 1U)
291#define SHIFT_OVERFLOW (NOT_FOUND - 1U)
292
293#define TABLE_SIZE_BITS 6
294#define TABLE_SIZE (1U << TABLE_SIZE_BITS)
295#define TABLE_MASK (TABLE_SIZE - 1U)
296
297typedef struct STRINGLIB(_pre) {
298 const STRINGLIB_CHAR *needle;
299 Py_ssize_t len_needle;
300 Py_ssize_t cut;
301 Py_ssize_t period;
302 int is_periodic;
303 SHIFT_TYPE table[TABLE_SIZE];
304} STRINGLIB(prework);
305
306
307Py_LOCAL_INLINE(void)
308STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
309 STRINGLIB(prework) *p)
310{
311 p->needle = needle;
312 p->len_needle = len_needle;
313 p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
314 assert(p->period + p->cut <= len_needle);
315 p->is_periodic = (0 == memcmp(needle,
316 needle + p->period,
317 p->cut * STRINGLIB_SIZEOF_CHAR));
318 if (p->is_periodic) {
319 assert(p->cut <= len_needle/2);
320 assert(p->cut < p->period);
321 }
322 else {
323 // A lower bound on the period
324 p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
325 }
326 // Now fill up a table
327 memset(&(p->table[0]), 0xff, TABLE_SIZE*sizeof(SHIFT_TYPE));
328 assert(p->table[0] == NOT_FOUND);
329 assert(p->table[TABLE_MASK] == NOT_FOUND);
330 for (Py_ssize_t i = 0; i < len_needle; i++) {
331 Py_ssize_t shift = len_needle - i;
332 if (shift > SHIFT_OVERFLOW) {
333 shift = SHIFT_OVERFLOW;
334 }
335 p->table[needle[i] & TABLE_MASK] = Py_SAFE_DOWNCAST(shift,
336 Py_ssize_t,
337 SHIFT_TYPE);
338 }
339}
340
341Py_LOCAL_INLINE(Py_ssize_t)
342STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
343 STRINGLIB(prework) *p)
344{
345 // Crochemore and Perrin's (1991) Two-Way algorithm.
346 // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
347 Py_ssize_t len_needle = p->len_needle;
348 Py_ssize_t cut = p->cut;
349 Py_ssize_t period = p->period;
350 const STRINGLIB_CHAR *needle = p->needle;
351 const STRINGLIB_CHAR *window = haystack;
352 const STRINGLIB_CHAR *last_window = haystack + len_haystack - len_needle;
353 SHIFT_TYPE *table = p->table;
354 LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
355
356 if (p->is_periodic) {
357 LOG("Needle is periodic.\n");
358 Py_ssize_t memory = 0;
359 periodicwindowloop:
360 while (window <= last_window) {
361 Py_ssize_t i = Py_MAX(cut, memory);
362
363 // Visualize the line-up:
364 LOG("> "); LOG_STRING(haystack, len_haystack);
365 LOG("\n> "); LOG("%*s", window - haystack, "");
366 LOG_STRING(needle, len_needle);
367 LOG("\n> "); LOG("%*s", window - haystack + i, "");
368 LOG(" ^ <-- cut\n");
369
370 if (window[i] != needle[i]) {
371 // Sunday's trick: if we're going to jump, we might
372 // as well jump to line up the character *after* the
373 // current window.
374 STRINGLIB_CHAR first_outside = window[len_needle];
375 SHIFT_TYPE shift = table[first_outside & TABLE_MASK];
376 if (shift == NOT_FOUND) {
377 LOG("\"%c\" not found. Skipping entirely.\n",
378 first_outside);
379 window += len_needle + 1;
380 }
381 else {
382 LOG("Shifting to line up \"%c\".\n", first_outside);
383 Py_ssize_t memory_shift = i - cut + 1;
384 window += Py_MAX(shift, memory_shift);
385 }
386 memory = 0;
387 goto periodicwindowloop;
388 }
389 for (i = i + 1; i < len_needle; i++) {
390 if (needle[i] != window[i]) {
391 LOG("Right half does not match. Jump ahead by %d.\n",
392 i - cut + 1);
393 window += i - cut + 1;
394 memory = 0;
395 goto periodicwindowloop;
396 }
397 }
398 for (i = memory; i < cut; i++) {
399 if (needle[i] != window[i]) {
400 LOG("Left half does not match. Jump ahead by period %d.\n",
401 period);
402 window += period;
403 memory = len_needle - period;
404 goto periodicwindowloop;
405 }
406 }
407 LOG("Left half matches. Returning %d.\n",
408 window - haystack);
409 return window - haystack;
410 }
411 }
412 else {
413 LOG("Needle is not periodic.\n");
414 assert(cut < len_needle);
415 STRINGLIB_CHAR needle_cut = needle[cut];
416 windowloop:
417 while (window <= last_window) {
418
419 // Visualize the line-up:
420 LOG("> "); LOG_STRING(haystack, len_haystack);
421 LOG("\n> "); LOG("%*s", window - haystack, "");
422 LOG_STRING(needle, len_needle);
423 LOG("\n> "); LOG("%*s", window - haystack + cut, "");
424 LOG(" ^ <-- cut\n");
425
426 if (window[cut] != needle_cut) {
427 // Sunday's trick: if we're going to jump, we might
428 // as well jump to line up the character *after* the
429 // current window.
430 STRINGLIB_CHAR first_outside = window[len_needle];
431 SHIFT_TYPE shift = table[first_outside & TABLE_MASK];
432 if (shift == NOT_FOUND) {
433 LOG("\"%c\" not found. Skipping entirely.\n",
434 first_outside);
435 window += len_needle + 1;
436 }
437 else {
438 LOG("Shifting to line up \"%c\".\n", first_outside);
439 window += shift;
440 }
441 goto windowloop;
442 }
443 for (Py_ssize_t i = cut + 1; i < len_needle; i++) {
444 if (needle[i] != window[i]) {
445 LOG("Right half does not match. Advance by %d.\n",
446 i - cut + 1);
447 window += i - cut + 1;
448 goto windowloop;
449 }
450 }
451 for (Py_ssize_t i = 0; i < cut; i++) {
452 if (needle[i] != window[i]) {
453 LOG("Left half does not match. Advance by period %d.\n",
454 period);
455 window += period;
456 goto windowloop;
457 }
458 }
459 LOG("Left half matches. Returning %d.\n", window - haystack);
460 return window - haystack;
461 }
462 }
463 LOG("Not found. Returning -1.\n");
464 return -1;
465}
466
467Py_LOCAL_INLINE(Py_ssize_t)
468STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
469 Py_ssize_t len_haystack,
470 const STRINGLIB_CHAR *needle,
471 Py_ssize_t len_needle)
472{
473 LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
474 STRINGLIB(prework) p;
475 STRINGLIB(_preprocess)(needle, len_needle, &p);
476 return STRINGLIB(_two_way)(haystack, len_haystack, &p);
477}
478
479Py_LOCAL_INLINE(Py_ssize_t)
480STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
481 Py_ssize_t len_haystack,
482 const STRINGLIB_CHAR *needle,
483 Py_ssize_t len_needle,
484 Py_ssize_t maxcount)
485{
486 LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
487 STRINGLIB(prework) p;
488 STRINGLIB(_preprocess)(needle, len_needle, &p);
489 Py_ssize_t index = 0, count = 0;
490 while (1) {
491 Py_ssize_t result;
492 result = STRINGLIB(_two_way)(haystack + index,
493 len_haystack - index, &p);
494 if (result == -1) {
495 return count;
496 }
497 count++;
498 if (count == maxcount) {
499 return maxcount;
500 }
501 index += result + len_needle;
502 }
503 return count;
504}
505
506#undef SHIFT_TYPE
507#undef NOT_FOUND
508#undef SHIFT_OVERFLOW
509#undef TABLE_SIZE_BITS
510#undef TABLE_SIZE
511#undef TABLE_MASK
512
513#undef LOG
514#undef LOG_STRING
515
Thomas Wouters477c8d52006-05-27 19:21:47 +0000516Py_LOCAL_INLINE(Py_ssize_t)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200517FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
Thomas Wouters477c8d52006-05-27 19:21:47 +0000518 const STRINGLIB_CHAR* p, Py_ssize_t m,
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000519 Py_ssize_t maxcount, int mode)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000520{
Antoine Pitrouf068f942010-01-13 14:19:12 +0000521 unsigned long mask;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000522 Py_ssize_t skip, count = 0;
523 Py_ssize_t i, j, mlast, w;
524
525 w = n - m;
526
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000527 if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
Thomas Wouters477c8d52006-05-27 19:21:47 +0000528 return -1;
529
530 /* look for special cases */
531 if (m <= 1) {
532 if (m <= 0)
533 return -1;
534 /* use special case for 1-character strings */
Serhiy Storchaka413fdce2015-11-14 15:42:17 +0200535 if (mode == FAST_SEARCH)
536 return STRINGLIB(find_char)(s, n, p[0]);
537 else if (mode == FAST_RSEARCH)
538 return STRINGLIB(rfind_char)(s, n, p[0]);
539 else { /* FAST_COUNT */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000540 for (i = 0; i < n; i++)
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000541 if (s[i] == p[0]) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000542 count++;
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000543 if (count == maxcount)
544 return maxcount;
545 }
Thomas Wouters477c8d52006-05-27 19:21:47 +0000546 return count;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000547 }
Thomas Wouters477c8d52006-05-27 19:21:47 +0000548 }
549
550 mlast = m - 1;
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500551 skip = mlast;
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000552 mask = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000553
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000554 if (mode != FAST_RSEARCH) {
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500555 if (m >= 100 && w >= 2000 && w / m >= 5) {
556 /* For larger problems where the needle isn't a huge
557 percentage of the size of the haystack, the relatively
558 expensive O(m) startup cost of the two-way algorithm
559 will surely pay off. */
560 if (mode == FAST_SEARCH) {
561 return STRINGLIB(_two_way_find)(s, n, p, m);
562 }
563 else {
564 return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
565 }
566 }
Victor Stinner7efa3b82013-04-08 00:26:43 +0200567 const STRINGLIB_CHAR *ss = s + m - 1;
568 const STRINGLIB_CHAR *pp = p + m - 1;
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000569
570 /* create compressed boyer-moore delta 1 table */
571
572 /* process pattern[:-1] */
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000573 for (i = 0; i < mlast; i++) {
Antoine Pitrouf068f942010-01-13 14:19:12 +0000574 STRINGLIB_BLOOM_ADD(mask, p[i]);
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500575 if (p[i] == p[mlast]) {
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000576 skip = mlast - i - 1;
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500577 }
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000578 }
579 /* process pattern[-1] outside the loop */
Antoine Pitrouf068f942010-01-13 14:19:12 +0000580 STRINGLIB_BLOOM_ADD(mask, p[mlast]);
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000581
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500582 if (m >= 100 && w >= 8000) {
583 /* To ensure that we have good worst-case behavior,
584 here's an adaptive version of the algorithm, where if
585 we match O(m) characters without any matches of the
586 entire needle, then we predict that the startup cost of
587 the two-way algorithm will probably be worth it. */
588 Py_ssize_t hits = 0;
589 for (i = 0; i <= w; i++) {
590 if (ss[i] == pp[0]) {
591 /* candidate match */
592 for (j = 0; j < mlast; j++) {
593 if (s[i+j] != p[j]) {
594 break;
595 }
596 }
597 if (j == mlast) {
598 /* got a match! */
599 if (mode != FAST_COUNT) {
600 return i;
601 }
602 count++;
603 if (count == maxcount) {
604 return maxcount;
605 }
606 i = i + mlast;
607 continue;
608 }
609 /* miss: check if next character is part of pattern */
610 if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
611 i = i + m;
612 }
613 else {
614 i = i + skip;
615 }
616 hits += j + 1;
617 if (hits >= m / 4 && i < w - 1000) {
618 /* We've done O(m) fruitless comparisons
619 anyway, so spend the O(m) cost on the
620 setup for the two-way algorithm. */
621 Py_ssize_t res;
622 if (mode == FAST_COUNT) {
623 res = STRINGLIB(_two_way_count)(
624 s+i, n-i, p, m, maxcount-count);
625 return count + res;
626 }
627 else {
628 res = STRINGLIB(_two_way_find)(s+i, n-i, p, m);
629 if (res == -1) {
630 return -1;
631 }
632 return i + res;
633 }
634 }
635 }
636 else {
637 /* skip: check if next character is part of pattern */
638 if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
639 i = i + m;
640 }
641 }
642 }
643 if (mode != FAST_COUNT) {
644 return -1;
645 }
646 return count;
647 }
648 /* The standard, non-adaptive version of the algorithm. */
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000649 for (i = 0; i <= w; i++) {
650 /* note: using mlast in the skip path slows things down on x86 */
Victor Stinner7efa3b82013-04-08 00:26:43 +0200651 if (ss[i] == pp[0]) {
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000652 /* candidate match */
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500653 for (j = 0; j < mlast; j++) {
654 if (s[i+j] != p[j]) {
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000655 break;
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500656 }
657 }
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000658 if (j == mlast) {
659 /* got a match! */
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500660 if (mode != FAST_COUNT) {
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000661 return i;
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500662 }
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000663 count++;
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500664 if (count == maxcount) {
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000665 return maxcount;
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500666 }
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000667 i = i + mlast;
668 continue;
669 }
670 /* miss: check if next character is part of pattern */
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500671 if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000672 i = i + m;
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500673 }
674 else {
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000675 i = i + skip;
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500676 }
677 }
678 else {
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000679 /* skip: check if next character is part of pattern */
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500680 if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000681 i = i + m;
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500682 }
Thomas Wouters477c8d52006-05-27 19:21:47 +0000683 }
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000684 }
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500685 }
686 else { /* FAST_RSEARCH */
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000687
688 /* create compressed boyer-moore delta 1 table */
689
690 /* process pattern[0] outside the loop */
Antoine Pitrouf068f942010-01-13 14:19:12 +0000691 STRINGLIB_BLOOM_ADD(mask, p[0]);
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000692 /* process pattern[:0:-1] */
693 for (i = mlast; i > 0; i--) {
Antoine Pitrouf068f942010-01-13 14:19:12 +0000694 STRINGLIB_BLOOM_ADD(mask, p[i]);
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500695 if (p[i] == p[0]) {
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000696 skip = i - 1;
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500697 }
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000698 }
699
700 for (i = w; i >= 0; i--) {
701 if (s[i] == p[0]) {
702 /* candidate match */
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500703 for (j = mlast; j > 0; j--) {
704 if (s[i+j] != p[j]) {
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000705 break;
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500706 }
707 }
708 if (j == 0) {
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000709 /* got a match! */
710 return i;
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500711 }
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000712 /* miss: check if previous character is part of pattern */
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500713 if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000714 i = i - m;
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500715 }
716 else {
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000717 i = i - skip;
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500718 }
719 }
720 else {
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000721 /* skip: check if previous character is part of pattern */
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500722 if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000723 i = i - m;
Dennis Sweeney73a85c42021-02-28 13:20:50 -0500724 }
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000725 }
Thomas Wouters477c8d52006-05-27 19:21:47 +0000726 }
727 }
728
729 if (mode != FAST_COUNT)
730 return -1;
731 return count;
732}
733