blob: 696773a09d43d2a5f4337d791defed5c2dbf6de0 [file] [log] [blame]
initial.commit3d533e02008-07-27 00:38:33 +00001/* adler32.c -- compute the Adler-32 checksum of a data stream
mark13dc2462017-02-14 22:15:29 -08002 * Copyright (C) 1995-2011, 2016 Mark Adler
initial.commit3d533e02008-07-27 00:38:33 +00003 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
hbono@chromium.orgd2dc2092011-12-12 08:48:38 +00006/* @(#) $Id$ */
initial.commit3d533e02008-07-27 00:38:33 +00007
hbono@chromium.orgd2dc2092011-12-12 08:48:38 +00008#include "zutil.h"
9
jiadong.zhu6c142162016-06-22 21:22:18 -070010local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2));
initial.commit3d533e02008-07-27 00:38:33 +000011
mark13dc2462017-02-14 22:15:29 -080012#define BASE 65521U /* largest prime smaller than 65536 */
initial.commit3d533e02008-07-27 00:38:33 +000013#define NMAX 5552
14/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
15
16#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;}
17#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1);
18#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2);
19#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4);
20#define DO16(buf) DO8(buf,0); DO8(buf,8);
21
jiadong.zhu6c142162016-06-22 21:22:18 -070022/* use NO_DIVIDE if your processor does not do division in hardware --
23 try it both ways to see which is faster */
initial.commit3d533e02008-07-27 00:38:33 +000024#ifdef NO_DIVIDE
jiadong.zhu6c142162016-06-22 21:22:18 -070025/* note that this assumes BASE is 65521, where 65536 % 65521 == 15
26 (thank you to John Reiser for pointing this out) */
27# define CHOP(a) \
initial.commit3d533e02008-07-27 00:38:33 +000028 do { \
jiadong.zhu6c142162016-06-22 21:22:18 -070029 unsigned long tmp = a >> 16; \
30 a &= 0xffffUL; \
31 a += (tmp << 4) - tmp; \
32 } while (0)
33# define MOD28(a) \
34 do { \
35 CHOP(a); \
jmadillbf2aebe2016-06-20 06:58:52 -070036 if (a >= BASE) a -= BASE; \
jiadong.zhu90f7dad2016-06-20 04:09:43 -070037 } while (0)
jiadong.zhu6c142162016-06-22 21:22:18 -070038# define MOD(a) \
jmadillbf2aebe2016-06-20 06:58:52 -070039 do { \
jiadong.zhu6c142162016-06-22 21:22:18 -070040 CHOP(a); \
41 MOD28(a); \
42 } while (0)
43# define MOD63(a) \
44 do { /* this assumes a is not negative */ \
45 z_off64_t tmp = a >> 32; \
46 a &= 0xffffffffL; \
47 a += (tmp << 8) - (tmp << 5) + tmp; \
48 tmp = a >> 16; \
49 a &= 0xffffL; \
50 a += (tmp << 4) - tmp; \
51 tmp = a >> 16; \
52 a &= 0xffffL; \
53 a += (tmp << 4) - tmp; \
initial.commit3d533e02008-07-27 00:38:33 +000054 if (a >= BASE) a -= BASE; \
55 } while (0)
56#else
57# define MOD(a) a %= BASE
jiadong.zhu6c142162016-06-22 21:22:18 -070058# define MOD28(a) a %= BASE
59# define MOD63(a) a %= BASE
initial.commit3d533e02008-07-27 00:38:33 +000060#endif
61
Adenilson Cavalcanti5de00af2020-01-08 22:12:31 +000062#include "cpu_features.h"
63#if defined(ADLER32_SIMD_SSSE3) || defined(ADLER32_SIMD_NEON)
Noel Gordon17bbb3d2017-09-29 19:44:25 +000064#include "adler32_simd.h"
65#endif
66
initial.commit3d533e02008-07-27 00:38:33 +000067/* ========================================================================= */
mark13dc2462017-02-14 22:15:29 -080068uLong ZEXPORT adler32_z(adler, buf, len)
initial.commit3d533e02008-07-27 00:38:33 +000069 uLong adler;
70 const Bytef *buf;
mark13dc2462017-02-14 22:15:29 -080071 z_size_t len;
initial.commit3d533e02008-07-27 00:38:33 +000072{
73 unsigned long sum2;
74 unsigned n;
75
Noel Gordon17bbb3d2017-09-29 19:44:25 +000076#if defined(ADLER32_SIMD_SSSE3)
77 if (x86_cpu_enable_ssse3 && buf && len >= 64)
78 return adler32_simd_(adler, buf, len);
79#elif defined(ADLER32_SIMD_NEON)
80 if (buf && len >= 64)
81 return adler32_simd_(adler, buf, len);
82#endif
83
initial.commit3d533e02008-07-27 00:38:33 +000084 /* split Adler-32 into component sums */
85 sum2 = (adler >> 16) & 0xffff;
86 adler &= 0xffff;
87
88 /* in case user likes doing a byte at a time, keep it fast */
89 if (len == 1) {
90 adler += buf[0];
91 if (adler >= BASE)
92 adler -= BASE;
93 sum2 += adler;
94 if (sum2 >= BASE)
95 sum2 -= BASE;
96 return adler | (sum2 << 16);
97 }
98
Noel Gordon46765812018-02-07 09:33:01 +000099#if defined(ADLER32_SIMD_SSSE3)
100 /*
101 * Use SSSE3 to compute the adler32. Since this routine can be
102 * freely used, check CPU features here. zlib convention is to
103 * call adler32(0, NULL, 0), before making calls to adler32().
104 * So this is a good early (and infrequent) place to cache CPU
105 * features for those later, more interesting adler32() calls.
106 */
107 if (buf == Z_NULL) {
108 if (!len) /* Assume user is calling adler32(0, NULL, 0); */
Adenilson Cavalcanti5de00af2020-01-08 22:12:31 +0000109 cpu_check_features();
Noel Gordon46765812018-02-07 09:33:01 +0000110 return 1L;
111 }
112#else
initial.commit3d533e02008-07-27 00:38:33 +0000113 /* initial Adler-32 value (deferred check for len == 1 speed) */
114 if (buf == Z_NULL)
115 return 1L;
Noel Gordon46765812018-02-07 09:33:01 +0000116#endif
initial.commit3d533e02008-07-27 00:38:33 +0000117
118 /* in case short lengths are provided, keep it somewhat fast */
119 if (len < 16) {
120 while (len--) {
121 adler += *buf++;
122 sum2 += adler;
123 }
124 if (adler >= BASE)
125 adler -= BASE;
jiadong.zhu6c142162016-06-22 21:22:18 -0700126 MOD28(sum2); /* only added so many BASE's */
initial.commit3d533e02008-07-27 00:38:33 +0000127 return adler | (sum2 << 16);
128 }
129
130 /* do length NMAX blocks -- requires just one modulo operation */
131 while (len >= NMAX) {
132 len -= NMAX;
133 n = NMAX / 16; /* NMAX is divisible by 16 */
134 do {
135 DO16(buf); /* 16 sums unrolled */
136 buf += 16;
137 } while (--n);
138 MOD(adler);
139 MOD(sum2);
140 }
141
142 /* do remaining bytes (less than NMAX, still just one modulo) */
143 if (len) { /* avoid modulos if none remaining */
144 while (len >= 16) {
145 len -= 16;
146 DO16(buf);
147 buf += 16;
148 }
149 while (len--) {
150 adler += *buf++;
151 sum2 += adler;
152 }
153 MOD(adler);
154 MOD(sum2);
155 }
156
157 /* return recombined sums */
158 return adler | (sum2 << 16);
159}
160
161/* ========================================================================= */
mark13dc2462017-02-14 22:15:29 -0800162uLong ZEXPORT adler32(adler, buf, len)
163 uLong adler;
164 const Bytef *buf;
165 uInt len;
166{
167 return adler32_z(adler, buf, len);
168}
169
170/* ========================================================================= */
hbono@chromium.orgd2dc2092011-12-12 08:48:38 +0000171local uLong adler32_combine_(adler1, adler2, len2)
initial.commit3d533e02008-07-27 00:38:33 +0000172 uLong adler1;
173 uLong adler2;
hbono@chromium.orgd2dc2092011-12-12 08:48:38 +0000174 z_off64_t len2;
initial.commit3d533e02008-07-27 00:38:33 +0000175{
176 unsigned long sum1;
177 unsigned long sum2;
178 unsigned rem;
179
jiadong.zhu6c142162016-06-22 21:22:18 -0700180 /* for negative len, return invalid adler32 as a clue for debugging */
181 if (len2 < 0)
182 return 0xffffffffUL;
183
initial.commit3d533e02008-07-27 00:38:33 +0000184 /* the derivation of this formula is left as an exercise for the reader */
jiadong.zhu6c142162016-06-22 21:22:18 -0700185 MOD63(len2); /* assumes len2 >= 0 */
186 rem = (unsigned)len2;
initial.commit3d533e02008-07-27 00:38:33 +0000187 sum1 = adler1 & 0xffff;
188 sum2 = rem * sum1;
189 MOD(sum2);
190 sum1 += (adler2 & 0xffff) + BASE - 1;
191 sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
hbono@chromium.orgd2dc2092011-12-12 08:48:38 +0000192 if (sum1 >= BASE) sum1 -= BASE;
193 if (sum1 >= BASE) sum1 -= BASE;
mark13dc2462017-02-14 22:15:29 -0800194 if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1);
hbono@chromium.orgd2dc2092011-12-12 08:48:38 +0000195 if (sum2 >= BASE) sum2 -= BASE;
initial.commit3d533e02008-07-27 00:38:33 +0000196 return sum1 | (sum2 << 16);
197}
hbono@chromium.orgd2dc2092011-12-12 08:48:38 +0000198
199/* ========================================================================= */
200uLong ZEXPORT adler32_combine(adler1, adler2, len2)
201 uLong adler1;
202 uLong adler2;
203 z_off_t len2;
204{
205 return adler32_combine_(adler1, adler2, len2);
206}
207
208uLong ZEXPORT adler32_combine64(adler1, adler2, len2)
209 uLong adler1;
210 uLong adler2;
211 z_off64_t len2;
212{
213 return adler32_combine_(adler1, adler2, len2);
214}