blob: 56a4467d353813e5db41c8c959e8d1639f9335d9 [file] [log] [blame]
/* stringlib: fastsearch implementation */
#define STRINGLIB_FASTSEARCH_H
/* fast search/count implementation, based on a mix between boyer-
moore and horspool, with a few more bells and whistles on the top.
for some more background, see: http://effbot.org/zone/stringlib.htm */
/* note: fastsearch may access s[n], which isn't a problem when using
Python's ordinary string types, but may cause problems if you're
using this code in other contexts. also, the count mode returns -1
if there cannot possible be a match in the target string, and 0 if
it has actually checked for matches, but didn't find any. callers
beware! */
#define FAST_COUNT 0
#define FAST_SEARCH 1
#define FAST_RSEARCH 2
#if LONG_BIT >= 128
#define STRINGLIB_BLOOM_WIDTH 128
#elif LONG_BIT >= 64
#define STRINGLIB_BLOOM_WIDTH 64
#elif LONG_BIT >= 32
#define STRINGLIB_BLOOM_WIDTH 32
#else
#error "LONG_BIT is smaller than 32"
#endif
#define STRINGLIB_BLOOM_ADD(mask, ch) \
((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
#define STRINGLIB_BLOOM(mask, ch) \
((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
#if STRINGLIB_SIZEOF_CHAR == 1
# define MEMCHR_CUT_OFF 15
#else
# define MEMCHR_CUT_OFF 40
#endif
Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
{
const STRINGLIB_CHAR *p, *e;
p = s;
e = s + n;
if (n > MEMCHR_CUT_OFF) {
#if STRINGLIB_SIZEOF_CHAR == 1
p = memchr(s, ch, n);
if (p != NULL)
return (p - s);
return -1;
#else
/* use memchr if we can choose a needle without too many likely
false positives */
const STRINGLIB_CHAR *s1, *e1;
unsigned char needle = ch & 0xff;
/* If looking for a multiple of 256, we'd have too
many false positives looking for the '\0' byte in UCS2
and UCS4 representations. */
if (needle != 0) {
do {
void *candidate = memchr(p, needle,
(e - p) * sizeof(STRINGLIB_CHAR));
if (candidate == NULL)
return -1;
s1 = p;
p = (const STRINGLIB_CHAR *)
_Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
if (*p == ch)
return (p - s);
/* False positive */
p++;
if (p - s1 > MEMCHR_CUT_OFF)
continue;
if (e - p <= MEMCHR_CUT_OFF)
break;
e1 = p + MEMCHR_CUT_OFF;
while (p != e1) {
if (*p == ch)
return (p - s);
p++;
}
}
while (e - p > MEMCHR_CUT_OFF);
}
#endif
}
while (p < e) {
if (*p == ch)
return (p - s);
p++;
}
return -1;
}
Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
{
const STRINGLIB_CHAR *p;
#ifdef HAVE_MEMRCHR
/* memrchr() is a GNU extension, available since glibc 2.1.91.
it doesn't seem as optimized as memchr(), but is still quite
faster than our hand-written loop below */
if (n > MEMCHR_CUT_OFF) {
#if STRINGLIB_SIZEOF_CHAR == 1
p = memrchr(s, ch, n);
if (p != NULL)
return (p - s);
return -1;
#else
/* use memrchr if we can choose a needle without too many likely
false positives */
const STRINGLIB_CHAR *s1;
Py_ssize_t n1;
unsigned char needle = ch & 0xff;
/* If looking for a multiple of 256, we'd have too
many false positives looking for the '\0' byte in UCS2
and UCS4 representations. */
if (needle != 0) {
do {
void *candidate = memrchr(s, needle,
n * sizeof(STRINGLIB_CHAR));
if (candidate == NULL)
return -1;
n1 = n;
p = (const STRINGLIB_CHAR *)
_Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
n = p - s;
if (*p == ch)
return n;
/* False positive */
if (n1 - n > MEMCHR_CUT_OFF)
continue;
if (n <= MEMCHR_CUT_OFF)
break;
s1 = p - MEMCHR_CUT_OFF;
while (p > s1) {
p--;
if (*p == ch)
return (p - s);
}
n = p - s;
}
while (n > MEMCHR_CUT_OFF);
}
#endif
}
#endif /* HAVE_MEMRCHR */
p = s + n;
while (p > s) {
p--;
if (*p == ch)
return (p - s);
}
return -1;
}
#undef MEMCHR_CUT_OFF
Py_LOCAL_INLINE(Py_ssize_t)
FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
const STRINGLIB_CHAR* p, Py_ssize_t m,
Py_ssize_t maxcount, int mode)
{
unsigned long mask;
Py_ssize_t skip, count = 0;
Py_ssize_t i, j, mlast, w;
w = n - m;
if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
return -1;
/* look for special cases */
if (m <= 1) {
if (m <= 0)
return -1;
/* use special case for 1-character strings */
if (mode == FAST_SEARCH)
return STRINGLIB(find_char)(s, n, p[0]);
else if (mode == FAST_RSEARCH)
return STRINGLIB(rfind_char)(s, n, p[0]);
else { /* FAST_COUNT */
for (i = 0; i < n; i++)
if (s[i] == p[0]) {
count++;
if (count == maxcount)
return maxcount;
}
return count;
}
}
mlast = m - 1;
skip = mlast - 1;
mask = 0;
if (mode != FAST_RSEARCH) {
const STRINGLIB_CHAR *ss = s + m - 1;
const STRINGLIB_CHAR *pp = p + m - 1;
/* create compressed boyer-moore delta 1 table */
/* process pattern[:-1] */
for (i = 0; i < mlast; i++) {
STRINGLIB_BLOOM_ADD(mask, p[i]);
if (p[i] == p[mlast])
skip = mlast - i - 1;
}
/* process pattern[-1] outside the loop */
STRINGLIB_BLOOM_ADD(mask, p[mlast]);
for (i = 0; i <= w; i++) {
/* note: using mlast in the skip path slows things down on x86 */
if (ss[i] == pp[0]) {
/* candidate match */
for (j = 0; j < mlast; j++)
if (s[i+j] != p[j])
break;
if (j == mlast) {
/* got a match! */
if (mode != FAST_COUNT)
return i;
count++;
if (count == maxcount)
return maxcount;
i = i + mlast;
continue;
}
/* miss: check if next character is part of pattern */
if (!STRINGLIB_BLOOM(mask, ss[i+1]))
i = i + m;
else
i = i + skip;
} else {
/* skip: check if next character is part of pattern */
if (!STRINGLIB_BLOOM(mask, ss[i+1]))
i = i + m;
}
}
} else { /* FAST_RSEARCH */
/* create compressed boyer-moore delta 1 table */
/* process pattern[0] outside the loop */
STRINGLIB_BLOOM_ADD(mask, p[0]);
/* process pattern[:0:-1] */
for (i = mlast; i > 0; i--) {
STRINGLIB_BLOOM_ADD(mask, p[i]);
if (p[i] == p[0])
skip = i - 1;
}
for (i = w; i >= 0; i--) {
if (s[i] == p[0]) {
/* candidate match */
for (j = mlast; j > 0; j--)
if (s[i+j] != p[j])
break;
if (j == 0)
/* got a match! */
return i;
/* miss: check if previous character is part of pattern */
if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
i = i - m;
else
i = i - skip;
} else {
/* skip: check if previous character is part of pattern */
if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
i = i - m;
}
}
}
if (mode != FAST_COUNT)
return -1;
return count;
}