initial.commit | 3d533e0 | 2008-07-27 00:38:33 +0000 | [diff] [blame] | 1 | /* adler32.c -- compute the Adler-32 checksum of a data stream |
mark | 13dc246 | 2017-02-14 22:15:29 -0800 | [diff] [blame] | 2 | * Copyright (C) 1995-2011, 2016 Mark Adler |
initial.commit | 3d533e0 | 2008-07-27 00:38:33 +0000 | [diff] [blame] | 3 | * For conditions of distribution and use, see copyright notice in zlib.h |
| 4 | */ |
| 5 | |
hbono@chromium.org | d2dc209 | 2011-12-12 08:48:38 +0000 | [diff] [blame] | 6 | /* @(#) $Id$ */ |
initial.commit | 3d533e0 | 2008-07-27 00:38:33 +0000 | [diff] [blame] | 7 | |
hbono@chromium.org | d2dc209 | 2011-12-12 08:48:38 +0000 | [diff] [blame] | 8 | #include "zutil.h" |
| 9 | |
jiadong.zhu | 6c14216 | 2016-06-22 21:22:18 -0700 | [diff] [blame] | 10 | local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2)); |
initial.commit | 3d533e0 | 2008-07-27 00:38:33 +0000 | [diff] [blame] | 11 | |
mark | 13dc246 | 2017-02-14 22:15:29 -0800 | [diff] [blame] | 12 | #define BASE 65521U /* largest prime smaller than 65536 */ |
initial.commit | 3d533e0 | 2008-07-27 00:38:33 +0000 | [diff] [blame] | 13 | #define NMAX 5552 |
| 14 | /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ |
| 15 | |
| 16 | #define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;} |
| 17 | #define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); |
| 18 | #define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); |
| 19 | #define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); |
| 20 | #define DO16(buf) DO8(buf,0); DO8(buf,8); |
| 21 | |
jiadong.zhu | 6c14216 | 2016-06-22 21:22:18 -0700 | [diff] [blame] | 22 | /* use NO_DIVIDE if your processor does not do division in hardware -- |
| 23 | try it both ways to see which is faster */ |
initial.commit | 3d533e0 | 2008-07-27 00:38:33 +0000 | [diff] [blame] | 24 | #ifdef NO_DIVIDE |
jiadong.zhu | 6c14216 | 2016-06-22 21:22:18 -0700 | [diff] [blame] | 25 | /* note that this assumes BASE is 65521, where 65536 % 65521 == 15 |
| 26 | (thank you to John Reiser for pointing this out) */ |
| 27 | # define CHOP(a) \ |
initial.commit | 3d533e0 | 2008-07-27 00:38:33 +0000 | [diff] [blame] | 28 | do { \ |
jiadong.zhu | 6c14216 | 2016-06-22 21:22:18 -0700 | [diff] [blame] | 29 | unsigned long tmp = a >> 16; \ |
| 30 | a &= 0xffffUL; \ |
| 31 | a += (tmp << 4) - tmp; \ |
| 32 | } while (0) |
| 33 | # define MOD28(a) \ |
| 34 | do { \ |
| 35 | CHOP(a); \ |
jmadill | bf2aebe | 2016-06-20 06:58:52 -0700 | [diff] [blame] | 36 | if (a >= BASE) a -= BASE; \ |
jiadong.zhu | 90f7dad | 2016-06-20 04:09:43 -0700 | [diff] [blame] | 37 | } while (0) |
jiadong.zhu | 6c14216 | 2016-06-22 21:22:18 -0700 | [diff] [blame] | 38 | # define MOD(a) \ |
jmadill | bf2aebe | 2016-06-20 06:58:52 -0700 | [diff] [blame] | 39 | do { \ |
jiadong.zhu | 6c14216 | 2016-06-22 21:22:18 -0700 | [diff] [blame] | 40 | CHOP(a); \ |
| 41 | MOD28(a); \ |
| 42 | } while (0) |
| 43 | # define MOD63(a) \ |
| 44 | do { /* this assumes a is not negative */ \ |
| 45 | z_off64_t tmp = a >> 32; \ |
| 46 | a &= 0xffffffffL; \ |
| 47 | a += (tmp << 8) - (tmp << 5) + tmp; \ |
| 48 | tmp = a >> 16; \ |
| 49 | a &= 0xffffL; \ |
| 50 | a += (tmp << 4) - tmp; \ |
| 51 | tmp = a >> 16; \ |
| 52 | a &= 0xffffL; \ |
| 53 | a += (tmp << 4) - tmp; \ |
initial.commit | 3d533e0 | 2008-07-27 00:38:33 +0000 | [diff] [blame] | 54 | if (a >= BASE) a -= BASE; \ |
| 55 | } while (0) |
| 56 | #else |
| 57 | # define MOD(a) a %= BASE |
jiadong.zhu | 6c14216 | 2016-06-22 21:22:18 -0700 | [diff] [blame] | 58 | # define MOD28(a) a %= BASE |
| 59 | # define MOD63(a) a %= BASE |
initial.commit | 3d533e0 | 2008-07-27 00:38:33 +0000 | [diff] [blame] | 60 | #endif |
| 61 | |
Adenilson Cavalcanti | 5de00af | 2020-01-08 22:12:31 +0000 | [diff] [blame] | 62 | #include "cpu_features.h" |
| 63 | #if defined(ADLER32_SIMD_SSSE3) || defined(ADLER32_SIMD_NEON) |
Noel Gordon | 17bbb3d | 2017-09-29 19:44:25 +0000 | [diff] [blame] | 64 | #include "adler32_simd.h" |
| 65 | #endif |
| 66 | |
initial.commit | 3d533e0 | 2008-07-27 00:38:33 +0000 | [diff] [blame] | 67 | /* ========================================================================= */ |
mark | 13dc246 | 2017-02-14 22:15:29 -0800 | [diff] [blame] | 68 | uLong ZEXPORT adler32_z(adler, buf, len) |
initial.commit | 3d533e0 | 2008-07-27 00:38:33 +0000 | [diff] [blame] | 69 | uLong adler; |
| 70 | const Bytef *buf; |
mark | 13dc246 | 2017-02-14 22:15:29 -0800 | [diff] [blame] | 71 | z_size_t len; |
initial.commit | 3d533e0 | 2008-07-27 00:38:33 +0000 | [diff] [blame] | 72 | { |
| 73 | unsigned long sum2; |
| 74 | unsigned n; |
| 75 | |
Noel Gordon | 17bbb3d | 2017-09-29 19:44:25 +0000 | [diff] [blame] | 76 | #if defined(ADLER32_SIMD_SSSE3) |
| 77 | if (x86_cpu_enable_ssse3 && buf && len >= 64) |
| 78 | return adler32_simd_(adler, buf, len); |
| 79 | #elif defined(ADLER32_SIMD_NEON) |
| 80 | if (buf && len >= 64) |
| 81 | return adler32_simd_(adler, buf, len); |
| 82 | #endif |
| 83 | |
initial.commit | 3d533e0 | 2008-07-27 00:38:33 +0000 | [diff] [blame] | 84 | /* split Adler-32 into component sums */ |
| 85 | sum2 = (adler >> 16) & 0xffff; |
| 86 | adler &= 0xffff; |
| 87 | |
| 88 | /* in case user likes doing a byte at a time, keep it fast */ |
| 89 | if (len == 1) { |
| 90 | adler += buf[0]; |
| 91 | if (adler >= BASE) |
| 92 | adler -= BASE; |
| 93 | sum2 += adler; |
| 94 | if (sum2 >= BASE) |
| 95 | sum2 -= BASE; |
| 96 | return adler | (sum2 << 16); |
| 97 | } |
| 98 | |
Noel Gordon | 4676581 | 2018-02-07 09:33:01 +0000 | [diff] [blame] | 99 | #if defined(ADLER32_SIMD_SSSE3) |
| 100 | /* |
| 101 | * Use SSSE3 to compute the adler32. Since this routine can be |
| 102 | * freely used, check CPU features here. zlib convention is to |
| 103 | * call adler32(0, NULL, 0), before making calls to adler32(). |
| 104 | * So this is a good early (and infrequent) place to cache CPU |
| 105 | * features for those later, more interesting adler32() calls. |
| 106 | */ |
| 107 | if (buf == Z_NULL) { |
| 108 | if (!len) /* Assume user is calling adler32(0, NULL, 0); */ |
Adenilson Cavalcanti | 5de00af | 2020-01-08 22:12:31 +0000 | [diff] [blame] | 109 | cpu_check_features(); |
Noel Gordon | 4676581 | 2018-02-07 09:33:01 +0000 | [diff] [blame] | 110 | return 1L; |
| 111 | } |
| 112 | #else |
initial.commit | 3d533e0 | 2008-07-27 00:38:33 +0000 | [diff] [blame] | 113 | /* initial Adler-32 value (deferred check for len == 1 speed) */ |
| 114 | if (buf == Z_NULL) |
| 115 | return 1L; |
Noel Gordon | 4676581 | 2018-02-07 09:33:01 +0000 | [diff] [blame] | 116 | #endif |
initial.commit | 3d533e0 | 2008-07-27 00:38:33 +0000 | [diff] [blame] | 117 | |
| 118 | /* in case short lengths are provided, keep it somewhat fast */ |
| 119 | if (len < 16) { |
| 120 | while (len--) { |
| 121 | adler += *buf++; |
| 122 | sum2 += adler; |
| 123 | } |
| 124 | if (adler >= BASE) |
| 125 | adler -= BASE; |
jiadong.zhu | 6c14216 | 2016-06-22 21:22:18 -0700 | [diff] [blame] | 126 | MOD28(sum2); /* only added so many BASE's */ |
initial.commit | 3d533e0 | 2008-07-27 00:38:33 +0000 | [diff] [blame] | 127 | return adler | (sum2 << 16); |
| 128 | } |
| 129 | |
| 130 | /* do length NMAX blocks -- requires just one modulo operation */ |
| 131 | while (len >= NMAX) { |
| 132 | len -= NMAX; |
| 133 | n = NMAX / 16; /* NMAX is divisible by 16 */ |
| 134 | do { |
| 135 | DO16(buf); /* 16 sums unrolled */ |
| 136 | buf += 16; |
| 137 | } while (--n); |
| 138 | MOD(adler); |
| 139 | MOD(sum2); |
| 140 | } |
| 141 | |
| 142 | /* do remaining bytes (less than NMAX, still just one modulo) */ |
| 143 | if (len) { /* avoid modulos if none remaining */ |
| 144 | while (len >= 16) { |
| 145 | len -= 16; |
| 146 | DO16(buf); |
| 147 | buf += 16; |
| 148 | } |
| 149 | while (len--) { |
| 150 | adler += *buf++; |
| 151 | sum2 += adler; |
| 152 | } |
| 153 | MOD(adler); |
| 154 | MOD(sum2); |
| 155 | } |
| 156 | |
| 157 | /* return recombined sums */ |
| 158 | return adler | (sum2 << 16); |
| 159 | } |
| 160 | |
| 161 | /* ========================================================================= */ |
mark | 13dc246 | 2017-02-14 22:15:29 -0800 | [diff] [blame] | 162 | uLong ZEXPORT adler32(adler, buf, len) |
| 163 | uLong adler; |
| 164 | const Bytef *buf; |
| 165 | uInt len; |
| 166 | { |
| 167 | return adler32_z(adler, buf, len); |
| 168 | } |
| 169 | |
| 170 | /* ========================================================================= */ |
hbono@chromium.org | d2dc209 | 2011-12-12 08:48:38 +0000 | [diff] [blame] | 171 | local uLong adler32_combine_(adler1, adler2, len2) |
initial.commit | 3d533e0 | 2008-07-27 00:38:33 +0000 | [diff] [blame] | 172 | uLong adler1; |
| 173 | uLong adler2; |
hbono@chromium.org | d2dc209 | 2011-12-12 08:48:38 +0000 | [diff] [blame] | 174 | z_off64_t len2; |
initial.commit | 3d533e0 | 2008-07-27 00:38:33 +0000 | [diff] [blame] | 175 | { |
| 176 | unsigned long sum1; |
| 177 | unsigned long sum2; |
| 178 | unsigned rem; |
| 179 | |
jiadong.zhu | 6c14216 | 2016-06-22 21:22:18 -0700 | [diff] [blame] | 180 | /* for negative len, return invalid adler32 as a clue for debugging */ |
| 181 | if (len2 < 0) |
| 182 | return 0xffffffffUL; |
| 183 | |
initial.commit | 3d533e0 | 2008-07-27 00:38:33 +0000 | [diff] [blame] | 184 | /* the derivation of this formula is left as an exercise for the reader */ |
jiadong.zhu | 6c14216 | 2016-06-22 21:22:18 -0700 | [diff] [blame] | 185 | MOD63(len2); /* assumes len2 >= 0 */ |
| 186 | rem = (unsigned)len2; |
initial.commit | 3d533e0 | 2008-07-27 00:38:33 +0000 | [diff] [blame] | 187 | sum1 = adler1 & 0xffff; |
| 188 | sum2 = rem * sum1; |
| 189 | MOD(sum2); |
| 190 | sum1 += (adler2 & 0xffff) + BASE - 1; |
| 191 | sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; |
hbono@chromium.org | d2dc209 | 2011-12-12 08:48:38 +0000 | [diff] [blame] | 192 | if (sum1 >= BASE) sum1 -= BASE; |
| 193 | if (sum1 >= BASE) sum1 -= BASE; |
mark | 13dc246 | 2017-02-14 22:15:29 -0800 | [diff] [blame] | 194 | if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1); |
hbono@chromium.org | d2dc209 | 2011-12-12 08:48:38 +0000 | [diff] [blame] | 195 | if (sum2 >= BASE) sum2 -= BASE; |
initial.commit | 3d533e0 | 2008-07-27 00:38:33 +0000 | [diff] [blame] | 196 | return sum1 | (sum2 << 16); |
| 197 | } |
hbono@chromium.org | d2dc209 | 2011-12-12 08:48:38 +0000 | [diff] [blame] | 198 | |
| 199 | /* ========================================================================= */ |
| 200 | uLong ZEXPORT adler32_combine(adler1, adler2, len2) |
| 201 | uLong adler1; |
| 202 | uLong adler2; |
| 203 | z_off_t len2; |
| 204 | { |
| 205 | return adler32_combine_(adler1, adler2, len2); |
| 206 | } |
| 207 | |
| 208 | uLong ZEXPORT adler32_combine64(adler1, adler2, len2) |
| 209 | uLong adler1; |
| 210 | uLong adler2; |
| 211 | z_off64_t len2; |
| 212 | { |
| 213 | return adler32_combine_(adler1, adler2, len2); |
| 214 | } |