blob: 4437b870332bdef3e9e5e7fa361f6d3b02f2207b [file] [log] [blame]
Christian Heimes985ecdc2013-11-20 11:46:18 +01001#ifndef Py_HASH_H
2
3#define Py_HASH_H
4#ifdef __cplusplus
5extern "C" {
6#endif
7
8/* Helpers for hash functions */
9#ifndef Py_LIMITED_API
10PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double);
Andy Lester3d069532020-02-05 15:09:57 -060011PyAPI_FUNC(Py_hash_t) _Py_HashPointer(const void*);
Victor Stinnerf4532212020-05-12 18:46:20 +020012// Similar to _Py_HashPointer(), but don't replace -1 with -2
13PyAPI_FUNC(Py_hash_t) _Py_HashPointerRaw(const void*);
Christian Heimes985ecdc2013-11-20 11:46:18 +010014PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);
15#endif
16
17/* Prime multiplier used in string and various other hashes. */
18#define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */
19
20/* Parameters used for the numeric hash implementation. See notes for
Ned Batchelder01ae58d2017-11-09 11:55:34 -050021 _Py_HashDouble in Python/pyhash.c. Numeric hashes are based on
Christian Heimes985ecdc2013-11-20 11:46:18 +010022 reduction modulo the prime 2**_PyHASH_BITS - 1. */
23
24#if SIZEOF_VOID_P >= 8
25# define _PyHASH_BITS 61
26#else
27# define _PyHASH_BITS 31
28#endif
29
30#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
31#define _PyHASH_INF 314159
32#define _PyHASH_NAN 0
33#define _PyHASH_IMAG _PyHASH_MULTIPLIER
34
35
36/* hash secret
37 *
38 * memory layout on 64 bit systems
39 * cccccccc cccccccc cccccccc uc -- unsigned char[24]
40 * pppppppp ssssssss ........ fnv -- two Py_hash_t
Benjamin Peterson9b3d7702016-09-06 13:24:00 -070041 * k0k0k0k0 k1k1k1k1 ........ siphash -- two uint64_t
Christian Heimes985ecdc2013-11-20 11:46:18 +010042 * ........ ........ ssssssss djbx33a -- 16 bytes padding + one Py_hash_t
43 * ........ ........ eeeeeeee pyexpat XML hash salt
44 *
45 * memory layout on 32 bit systems
46 * cccccccc cccccccc cccccccc uc
47 * ppppssss ........ ........ fnv -- two Py_hash_t
Benjamin Peterson9b3d7702016-09-06 13:24:00 -070048 * k0k0k0k0 k1k1k1k1 ........ siphash -- two uint64_t (*)
Christian Heimes985ecdc2013-11-20 11:46:18 +010049 * ........ ........ ssss.... djbx33a -- 16 bytes padding + one Py_hash_t
50 * ........ ........ eeee.... pyexpat XML hash salt
51 *
52 * (*) The siphash member may not be available on 32 bit platforms without
53 * an unsigned int64 data type.
54 */
Martin v. Löwis1c0689c2014-01-03 21:36:49 +010055#ifndef Py_LIMITED_API
Christian Heimes985ecdc2013-11-20 11:46:18 +010056typedef union {
57 /* ensure 24 bytes */
58 unsigned char uc[24];
59 /* two Py_hash_t for FNV */
60 struct {
61 Py_hash_t prefix;
62 Py_hash_t suffix;
63 } fnv;
Christian Heimes985ecdc2013-11-20 11:46:18 +010064 /* two uint64 for SipHash24 */
65 struct {
Benjamin Peterson9b3d7702016-09-06 13:24:00 -070066 uint64_t k0;
67 uint64_t k1;
Christian Heimes985ecdc2013-11-20 11:46:18 +010068 } siphash;
Christian Heimes985ecdc2013-11-20 11:46:18 +010069 /* a different (!) Py_hash_t for small string optimization */
70 struct {
71 unsigned char padding[16];
72 Py_hash_t suffix;
73 } djbx33a;
74 struct {
75 unsigned char padding[16];
76 Py_hash_t hashsalt;
77 } expat;
78} _Py_HashSecret_t;
79PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;
Martin v. Löwis1c0689c2014-01-03 21:36:49 +010080#endif
Christian Heimes985ecdc2013-11-20 11:46:18 +010081
82#ifdef Py_DEBUG
83PyAPI_DATA(int) _Py_HashSecret_Initialized;
84#endif
85
86
87/* hash function definition */
88#ifndef Py_LIMITED_API
89typedef struct {
90 Py_hash_t (*const hash)(const void *, Py_ssize_t);
91 const char *name;
92 const int hash_bits;
93 const int seed_bits;
94} PyHash_FuncDef;
95
96PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
97#endif
98
99
100/* cutoff for small string DJBX33A optimization in range [1, cutoff).
101 *
102 * About 50% of the strings in a typical Python application are smaller than
103 * 6 to 7 chars. However DJBX33A is vulnerable to hash collision attacks.
104 * NEVER use DJBX33A for long strings!
105 *
106 * A Py_HASH_CUTOFF of 0 disables small string optimization. 32 bit platforms
107 * should use a smaller cutoff because it is easier to create colliding
108 * strings. A cutoff of 7 on 64bit platforms and 5 on 32bit platforms should
109 * provide a decent safety margin.
110 */
111#ifndef Py_HASH_CUTOFF
112# define Py_HASH_CUTOFF 0
113#elif (Py_HASH_CUTOFF > 7 || Py_HASH_CUTOFF < 0)
114# error Py_HASH_CUTOFF must in range 0...7.
115#endif /* Py_HASH_CUTOFF */
116
117
118/* hash algorithm selection
119 *
120 * The values for Py_HASH_SIPHASH24 and Py_HASH_FNV are hard-coded in the
121 * configure script.
122 *
123 * - FNV is available on all platforms and architectures.
Min ho Kim39d87b52019-08-31 06:21:19 +1000124 * - SIPHASH24 only works on platforms that don't require aligned memory for integers.
Christian Heimes985ecdc2013-11-20 11:46:18 +0100125 * - With EXTERNAL embedders can provide an alternative implementation with::
126 *
127 * PyHash_FuncDef PyHash_Func = {...};
128 *
129 * XXX: Figure out __declspec() for extern PyHash_FuncDef.
130 */
131#define Py_HASH_EXTERNAL 0
132#define Py_HASH_SIPHASH24 1
133#define Py_HASH_FNV 2
134
135#ifndef Py_HASH_ALGORITHM
Benjamin Peterson9b3d7702016-09-06 13:24:00 -0700136# ifndef HAVE_ALIGNED_REQUIRED
Christian Heimes985ecdc2013-11-20 11:46:18 +0100137# define Py_HASH_ALGORITHM Py_HASH_SIPHASH24
138# else
139# define Py_HASH_ALGORITHM Py_HASH_FNV
140# endif /* uint64_t && uint32_t && aligned */
141#endif /* Py_HASH_ALGORITHM */
142
143#ifdef __cplusplus
144}
145#endif
146
147#endif /* !Py_HASH_H */