Advance Long Index Lookup (+0.5% Speed)
This lookup can be advanced to before the short match check because either way
we will use it (in the next loop iter or in `_search_next_long`).
diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c
index 2e310e3..4ac4489 100644
--- a/lib/compress/zstd_double_fast.c
+++ b/lib/compress/zstd_double_fast.c
@@ -81,17 +81,14 @@
size_t hl0;
size_t hs0;
size_t hl1;
- // size_t hs1;
U32 idxl0;
U32 idxs0;
U32 idxl1;
- // U32 idxs0;
const BYTE* matchl0;
const BYTE* matchs0;
const BYTE* matchl1;
- // const BYTE* matchs1;
const BYTE* ip = istart;
const BYTE* ip1;
@@ -119,14 +116,14 @@
}
hl0 = ZSTD_hashPtr(ip, hBitsL, 8);
+ idxl0 = hashLong[hl0];
+ matchl0 = base + idxl0;
/* Main Search Loop */
do {
curr = (U32)(ip-base);
hs0 = ZSTD_hashPtr(ip, hBitsS, mls);
- idxl0 = hashLong[hl0];
idxs0 = hashSmall[hs0];
- matchl0 = base + idxl0;
matchs0 = base + idxs0;
hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */
@@ -151,6 +148,9 @@
}
}
+ idxl1 = hashLong[hl1];
+ matchl1 = base + idxl1;
+
if (idxs0 > prefixLowestIndex) {
/* check prefix short match */
if (MEM_read32(matchs0) == MEM_read32(ip)) {
@@ -168,6 +168,8 @@
ip1 += step;
hl0 = hl1;
+ idxl0 = idxl1;
+ matchl0 = matchl1;
#if defined(__aarch64__)
PREFETCH_L1(ip+256);
#endif
@@ -182,8 +184,7 @@
return (size_t)(iend - anchor);
_search_next_long:
- { idxl1 = hashLong[hl1];
- matchl1 = base + idxl1;
+ {
/* check prefix long +1 match */
if (idxl1 > prefixLowestIndex) {