blob: d6548716b9610a95c4459c9a7a741fce97ac6a9e [file] [log] [blame]
Yann Collet4856a002015-01-24 01:58:16 +01001/*
2 xxHash - Extremely Fast Hash algorithm
3 Header File
Yann Collet5835e1b2016-01-05 01:44:36 +01004 Copyright (C) 2012-2016, Yann Collet.
Yann Collet14c6d0d2015-07-04 18:14:14 -08005
Yann Collet4856a002015-01-24 01:58:16 +01006 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
17 distribution.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 You can contact the author at :
Yann Collet14c6d0d2015-07-04 18:14:14 -080032 - xxHash source repository : https://github.com/Cyan4973/xxHash
Yann Collet4856a002015-01-24 01:58:16 +010033*/
34
35/* Notice extracted from xxHash homepage :
36
37xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
38It also successfully passes all tests from the SMHasher suite.
39
40Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
41
42Name Speed Q.Score Author
43xxHash 5.4 GB/s 10
44CrapWow 3.2 GB/s 2 Andrew
45MumurHash 3a 2.7 GB/s 10 Austin Appleby
46SpookyHash 2.0 GB/s 10 Bob Jenkins
47SBox 1.4 GB/s 9 Bret Mulvey
48Lookup3 1.2 GB/s 9 Bob Jenkins
49SuperFastHash 1.2 GB/s 1 Paul Hsieh
50CityHash64 1.05 GB/s 10 Pike & Alakuijala
51FNV 0.55 GB/s 5 Fowler, Noll, Vo
52CRC32 0.43 GB/s 9
53MD5-32 0.33 GB/s 10 Ronald L. Rivest
54SHA1-32 0.28 GB/s 10
55
56Q.Score is a measure of quality of the hash function.
57It depends on successfully passing SMHasher test set.
5810 is a perfect score.
Yann Collet14c6d0d2015-07-04 18:14:14 -080059
60A 64-bits version, named XXH64, is available since r35.
61It offers much better speed, but for 64-bits applications only.
62Name Speed on 64 bits Speed on 32 bits
63XXH64 13.8 GB/s 1.9 GB/s
64XXH32 6.8 GB/s 6.0 GB/s
Yann Collet4856a002015-01-24 01:58:16 +010065*/
66
Yann Collet6c903a82016-05-28 13:34:07 +020067#ifndef XXHASH_H_5627135585666179
68#define XXHASH_H_5627135585666179 1
Yann Collet4856a002015-01-24 01:58:16 +010069
70#if defined (__cplusplus)
71extern "C" {
72#endif
73
74
Yann Collet5835e1b2016-01-05 01:44:36 +010075/* ****************************
Yann Collet14c6d0d2015-07-04 18:14:14 -080076* Definitions
Yann Collet5835e1b2016-01-05 01:44:36 +010077******************************/
Yann Collet4856a002015-01-24 01:58:16 +010078#include <stddef.h> /* size_t */
Yann Collet4856a002015-01-24 01:58:16 +010079typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
80
81
Yann Collet5835e1b2016-01-05 01:44:36 +010082/* ****************************
83* API modifier
84******************************/
85/*!XXH_PRIVATE_API
86* Transforms all publics symbols within `xxhash.c` into private ones.
87* Methodology :
88* instead of : #include "xxhash.h"
89* do :
90* #define XXH_PRIVATE_API
91* #include "xxhash.c" // note the .c , instead of .h
92* also : don't compile and link xxhash.c separately
93*/
94#ifdef XXH_PRIVATE_API
95# if defined(__GNUC__)
96# define XXH_PUBLIC_API static __attribute__((unused))
97# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
98# define XXH_PUBLIC_API static inline
99# elif defined(_MSC_VER)
100# define XXH_PUBLIC_API static __inline
101# else
102# define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */
103# endif
104#else
105# define XXH_PUBLIC_API /* do nothing */
106#endif
Yann Collet14c6d0d2015-07-04 18:14:14 -0800107
Yann Collet5835e1b2016-01-05 01:44:36 +0100108/*!XXH_NAMESPACE, aka Namespace Emulation :
Yann Collet14c6d0d2015-07-04 18:14:14 -0800109
Yann Collet5835e1b2016-01-05 01:44:36 +0100110If you want to include _and expose_ xxHash functions from within your own library,
111but also want to avoid symbol collisions with another library which also includes xxHash,
Yann Collet14c6d0d2015-07-04 18:14:14 -0800112
Yann Collet5835e1b2016-01-05 01:44:36 +0100113you can use XXH_NAMESPACE, to automatically prefix any public symbol from `xxhash.c`
114with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values).
115
116Note that no change is required within the calling program as long as it also includes `xxhash.h` :
117regular symbol name will be automatically translated by this header.
Yann Collet14c6d0d2015-07-04 18:14:14 -0800118*/
119#ifdef XXH_NAMESPACE
120# define XXH_CAT(A,B) A##B
121# define XXH_NAME2(A,B) XXH_CAT(A,B)
122# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
123# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
Yann Collet5835e1b2016-01-05 01:44:36 +0100124# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
Yann Collet14c6d0d2015-07-04 18:14:14 -0800125# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
126# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
127# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
128# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
129# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
130# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
131# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
132# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
133# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
134# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
135#endif
136
Yann Collet4856a002015-01-24 01:58:16 +0100137
Yann Collet5835e1b2016-01-05 01:44:36 +0100138/* *************************************
139* Version
140***************************************/
Yann Collet8ab94b62016-01-06 01:37:46 +0100141#define XXH_VERSION_MAJOR 0
Yann Collet6c903a82016-05-28 13:34:07 +0200142#define XXH_VERSION_MINOR 6
Yann Collet8ab94b62016-01-06 01:37:46 +0100143#define XXH_VERSION_RELEASE 0
Yann Collet5835e1b2016-01-05 01:44:36 +0100144#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
145XXH_PUBLIC_API unsigned XXH_versionNumber (void);
146
147
148/* ****************************
Yann Collet14c6d0d2015-07-04 18:14:14 -0800149* Simple Hash Functions
Yann Collet5835e1b2016-01-05 01:44:36 +0100150******************************/
Yann Collet6c903a82016-05-28 13:34:07 +0200151typedef unsigned int XXH32_hash_t;
152typedef unsigned long long XXH64_hash_t;
Yann Collet4856a002015-01-24 01:58:16 +0100153
Yann Collet6c903a82016-05-28 13:34:07 +0200154XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
155XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
Yann Collet4856a002015-01-24 01:58:16 +0100156
Yann Collet5835e1b2016-01-05 01:44:36 +0100157/*!
Yann Collet4856a002015-01-24 01:58:16 +0100158XXH32() :
159 Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
160 The memory between input & input+length must be valid (allocated and read-accessible).
161 "seed" can be used to alter the result predictably.
Yann Collet4856a002015-01-24 01:58:16 +0100162 Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
163XXH64() :
164 Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
Yann Collet5835e1b2016-01-05 01:44:36 +0100165 "seed" can be used to alter the result predictably.
166 This function runs faster on 64-bits systems, but slower on 32-bits systems (see benchmark).
Yann Collet4856a002015-01-24 01:58:16 +0100167*/
168
169
Yann Collet5835e1b2016-01-05 01:44:36 +0100170/* ****************************
Yann Collet6c903a82016-05-28 13:34:07 +0200171* Streaming Hash Functions
Yann Collet5835e1b2016-01-05 01:44:36 +0100172******************************/
Yann Collet6c903a82016-05-28 13:34:07 +0200173typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */
174typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
Yann Collet4856a002015-01-24 01:58:16 +0100175
Yann Collet6c903a82016-05-28 13:34:07 +0200176/*! Dynamic allocation of states
177 Compatible with dynamic libraries */
Yann Collet5835e1b2016-01-05 01:44:36 +0100178
179XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
180XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
181
182XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
183XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
184
185
186/* hash streaming */
187
188XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed);
189XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
Yann Collet6c903a82016-05-28 13:34:07 +0200190XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
Yann Collet5835e1b2016-01-05 01:44:36 +0100191
Yann Collet6c903a82016-05-28 13:34:07 +0200192XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed);
193XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
194XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
Yann Collet5835e1b2016-01-05 01:44:36 +0100195
196/*!
Yann Collet8ab94b62016-01-06 01:37:46 +0100197These functions generate the xxHash of an input provided in multiple segments,
Yann Collet5835e1b2016-01-05 01:44:36 +0100198as opposed to provided as a single block.
Yann Collet4856a002015-01-24 01:58:16 +0100199
Yann Collet8ab94b62016-01-06 01:37:46 +0100200XXH state must first be allocated, using either static or dynamic method provided above.
Yann Collet4856a002015-01-24 01:58:16 +0100201
202Start a new hash by initializing state with a seed, using XXHnn_reset().
203
204Then, feed the hash state by calling XXHnn_update() as many times as necessary.
Yann Collet8ab94b62016-01-06 01:37:46 +0100205Obviously, input must be valid, hence allocated and read accessible.
Yann Collet4856a002015-01-24 01:58:16 +0100206The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
207
Yann Collet5835e1b2016-01-05 01:44:36 +0100208Finally, a hash value can be produced anytime, by using XXHnn_digest().
Yann Collet6c903a82016-05-28 13:34:07 +0200209This function returns the nn-bits hash as an int or long long.
210
211It's still possible to continue inserting input into the hash state after a digest,
Yann Collet5835e1b2016-01-05 01:44:36 +0100212and later on generate some new hashes, by calling again XXHnn_digest().
Yann Collet4856a002015-01-24 01:58:16 +0100213
Yann Collet8ab94b62016-01-06 01:37:46 +0100214When done, free XXH state space if it was allocated dynamically.
Yann Collet4856a002015-01-24 01:58:16 +0100215*/
216
217
Yann Collet6c903a82016-05-28 13:34:07 +0200218/* **************************
219* Canonical representation
220****************************/
221typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
222typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
223
224XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
225XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
226
227XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
228XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
229
230/*! Default result type for XXH functions are primitive unsigned 32 and 64 bits.
231* The canonical representation uses human-readable write convention, aka big-endian (large digits first).
232* These functions allow transformation of hash result into and from its canonical format.
233* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
234*/
235
236
237#ifdef XXH_STATIC_LINKING_ONLY
238
239/* This part contains definition which shall only be used with static linking.
240 The prototypes / types defined here are not guaranteed to remain stable.
241 They could change in a future version, becoming incompatible with a different version of the library */
242
243 struct XXH32_state_s {
244 unsigned long long total_len;
245 unsigned seed;
246 unsigned v1;
247 unsigned v2;
248 unsigned v3;
249 unsigned v4;
250 unsigned mem32[4]; /* buffer defined as U32 for alignment */
251 unsigned memsize;
252 }; /* typedef'd to XXH32_state_t */
253
254 struct XXH64_state_s {
255 unsigned long long total_len;
256 unsigned long long seed;
257 unsigned long long v1;
258 unsigned long long v2;
259 unsigned long long v3;
260 unsigned long long v4;
261 unsigned long long mem64[4]; /* buffer defined as U64 for alignment */
262 unsigned memsize;
263 }; /* typedef'd to XXH64_state_t */
264
265
266#endif
267
268
Yann Collet4856a002015-01-24 01:58:16 +0100269#if defined (__cplusplus)
270}
271#endif
Yann Collet6c903a82016-05-28 13:34:07 +0200272
273#endif /* XXHASH_H_5627135585666179 */