blob: 7b9dd47a5d3f99fce99821bc0aa9f0432a61ac1a [file] [log] [blame]
Fredrik Lundha50d2012006-05-26 17:04:58 +00001/* stringlib: fastsearch implementation */
2
3#ifndef STRINGLIB_FASTSEARCH_H
4#define STRINGLIB_FASTSEARCH_H
5
6/* fast search/count implementation, based on a mix between boyer-
7 moore and horspool, with a few more bells and whistles on the top.
Antoine Pitrou5b7139a2010-01-02 21:12:58 +00008 for some more background, see: http://effbot.org/zone/stringlib.htm */
Fredrik Lundha50d2012006-05-26 17:04:58 +00009
10/* note: fastsearch may access s[n], which isn't a problem when using
11 Python's ordinary string types, but may cause problems if you're
12 using this code in other contexts. also, the count mode returns -1
13 if there cannot possible be a match in the target string, and 0 if
14 it has actually checked for matches, but didn't find any. callers
15 beware! */
16
17#define FAST_COUNT 0
18#define FAST_SEARCH 1
Antoine Pitrou5b7139a2010-01-02 21:12:58 +000019#define FAST_RSEARCH 2
Fredrik Lundha50d2012006-05-26 17:04:58 +000020
Fredrik Lundhc2d29c52006-05-27 14:58:20 +000021Py_LOCAL_INLINE(Py_ssize_t)
Fredrik Lundha50d2012006-05-26 17:04:58 +000022fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
23 const STRINGLIB_CHAR* p, Py_ssize_t m,
24 int mode)
25{
26 long mask;
27 Py_ssize_t skip, count = 0;
28 Py_ssize_t i, j, mlast, w;
29
30 w = n - m;
31
32 if (w < 0)
33 return -1;
34
35 /* look for special cases */
36 if (m <= 1) {
37 if (m <= 0)
38 return -1;
39 /* use special case for 1-character strings */
40 if (mode == FAST_COUNT) {
41 for (i = 0; i < n; i++)
42 if (s[i] == p[0])
43 count++;
44 return count;
Antoine Pitrou5b7139a2010-01-02 21:12:58 +000045 } else if (mode == FAST_SEARCH) {
Fredrik Lundha50d2012006-05-26 17:04:58 +000046 for (i = 0; i < n; i++)
47 if (s[i] == p[0])
48 return i;
Antoine Pitrou5b7139a2010-01-02 21:12:58 +000049 } else { /* FAST_RSEARCH */
50 for (i = n - 1; i > -1; i--)
51 if (s[i] == p[0])
52 return i;
Fredrik Lundha50d2012006-05-26 17:04:58 +000053 }
54 return -1;
55 }
56
57 mlast = m - 1;
Fredrik Lundha50d2012006-05-26 17:04:58 +000058 skip = mlast - 1;
Fredrik Lundha50d2012006-05-26 17:04:58 +000059
Antoine Pitrou5b7139a2010-01-02 21:12:58 +000060 if (mode != FAST_RSEARCH) {
61
62 /* create compressed boyer-moore delta 1 table */
63
64 /* process pattern[:-1] */
65 for (mask = i = 0; i < mlast; i++) {
66 mask |= (1 << (p[i] & 0x1F));
67 if (p[i] == p[mlast])
68 skip = mlast - i - 1;
69 }
70 /* process pattern[-1] outside the loop */
71 mask |= (1 << (p[mlast] & 0x1F));
72
73 for (i = 0; i <= w; i++) {
74 /* note: using mlast in the skip path slows things down on x86 */
75 if (s[i+m-1] == p[m-1]) {
76 /* candidate match */
77 for (j = 0; j < mlast; j++)
78 if (s[i+j] != p[j])
79 break;
80 if (j == mlast) {
81 /* got a match! */
82 if (mode != FAST_COUNT)
83 return i;
84 count++;
85 i = i + mlast;
86 continue;
87 }
88 /* miss: check if next character is part of pattern */
89 if (!(mask & (1 << (s[i+m] & 0x1F))))
90 i = i + m;
91 else
92 i = i + skip;
93 } else {
94 /* skip: check if next character is part of pattern */
95 if (!(mask & (1 << (s[i+m] & 0x1F))))
96 i = i + m;
Fredrik Lundha50d2012006-05-26 17:04:58 +000097 }
Antoine Pitrou5b7139a2010-01-02 21:12:58 +000098 }
99 } else { /* FAST_RSEARCH */
100
101 /* create compressed boyer-moore delta 1 table */
102
103 /* process pattern[0] outside the loop */
104 mask = (1 << (p[0] & 0x1F));
105 /* process pattern[:0:-1] */
106 for (i = mlast; i > 0; i--) {
107 mask |= (1 << (p[i] & 0x1F));
108 if (p[i] == p[0])
109 skip = i - 1;
110 }
111
112 for (i = w; i >= 0; i--) {
113 if (s[i] == p[0]) {
114 /* candidate match */
115 for (j = mlast; j > 0; j--)
116 if (s[i+j] != p[j])
117 break;
118 if (j == 0)
119 /* got a match! */
120 return i;
121 /* miss: check if previous character is part of pattern */
122 if (!(mask & (1 << (s[i-1] & 0x1F))))
123 i = i - m;
124 else
125 i = i - skip;
126 } else {
127 /* skip: check if previous character is part of pattern */
128 if (!(mask & (1 << (s[i-1] & 0x1F))))
129 i = i - m;
130 }
Fredrik Lundha50d2012006-05-26 17:04:58 +0000131 }
132 }
133
134 if (mode != FAST_COUNT)
135 return -1;
136 return count;
137}
138
139#endif
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000140
141/*
142Local variables:
143c-basic-offset: 4
144indent-tabs-mode: nil
145End:
146*/