blob: 23bccfb01d0ede0a5f15592ed2d14687db787eb7 [file] [log] [blame]
Thomas Wouters477c8d52006-05-27 19:21:47 +00001/* stringlib: fastsearch implementation */
2
3#ifndef STRINGLIB_FASTSEARCH_H
4#define STRINGLIB_FASTSEARCH_H
5
6/* fast search/count implementation, based on a mix between boyer-
7 moore and horspool, with a few more bells and whistles on the top.
Benjamin Petersonaa069002009-01-23 03:26:36 +00008 for some more background, see: http://effbot.org/stringlib.htm */
Thomas Wouters477c8d52006-05-27 19:21:47 +00009
10/* note: fastsearch may access s[n], which isn't a problem when using
11 Python's ordinary string types, but may cause problems if you're
12 using this code in other contexts. also, the count mode returns -1
13 if there cannot possible be a match in the target string, and 0 if
14 it has actually checked for matches, but didn't find any. callers
15 beware! */
16
17#define FAST_COUNT 0
18#define FAST_SEARCH 1
19
20Py_LOCAL_INLINE(Py_ssize_t)
21fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
22 const STRINGLIB_CHAR* p, Py_ssize_t m,
23 int mode)
24{
25 long mask;
26 Py_ssize_t skip, count = 0;
27 Py_ssize_t i, j, mlast, w;
28
29 w = n - m;
30
31 if (w < 0)
32 return -1;
33
34 /* look for special cases */
35 if (m <= 1) {
36 if (m <= 0)
37 return -1;
38 /* use special case for 1-character strings */
39 if (mode == FAST_COUNT) {
40 for (i = 0; i < n; i++)
41 if (s[i] == p[0])
42 count++;
43 return count;
44 } else {
45 for (i = 0; i < n; i++)
46 if (s[i] == p[0])
47 return i;
48 }
49 return -1;
50 }
51
52 mlast = m - 1;
53
54 /* create compressed boyer-moore delta 1 table */
55 skip = mlast - 1;
56 /* process pattern[:-1] */
57 for (mask = i = 0; i < mlast; i++) {
58 mask |= (1 << (p[i] & 0x1F));
59 if (p[i] == p[mlast])
60 skip = mlast - i - 1;
61 }
62 /* process pattern[-1] outside the loop */
63 mask |= (1 << (p[mlast] & 0x1F));
64
65 for (i = 0; i <= w; i++) {
66 /* note: using mlast in the skip path slows things down on x86 */
67 if (s[i+m-1] == p[m-1]) {
68 /* candidate match */
69 for (j = 0; j < mlast; j++)
70 if (s[i+j] != p[j])
71 break;
72 if (j == mlast) {
73 /* got a match! */
74 if (mode != FAST_COUNT)
75 return i;
76 count++;
77 i = i + mlast;
78 continue;
79 }
80 /* miss: check if next character is part of pattern */
81 if (!(mask & (1 << (s[i+m] & 0x1F))))
82 i = i + m;
83 else
84 i = i + skip;
85 } else {
86 /* skip: check if next character is part of pattern */
87 if (!(mask & (1 << (s[i+m] & 0x1F))))
88 i = i + m;
89 }
90 }
91
92 if (mode != FAST_COUNT)
93 return -1;
94 return count;
95}
96
97#endif
98
99/*
100Local variables:
101c-basic-offset: 4
102indent-tabs-mode: nil
103End:
104*/