Christian Heimes | 985ecdc | 2013-11-20 11:46:18 +0100 | [diff] [blame] | 1 | #ifndef Py_HASH_H |
| 2 | |
| 3 | #define Py_HASH_H |
| 4 | #ifdef __cplusplus |
| 5 | extern "C" { |
| 6 | #endif |
| 7 | |
| 8 | /* Helpers for hash functions */ |
| 9 | #ifndef Py_LIMITED_API |
| 10 | PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double); |
Andy Lester | 3d06953 | 2020-02-05 15:09:57 -0600 | [diff] [blame] | 11 | PyAPI_FUNC(Py_hash_t) _Py_HashPointer(const void*); |
Victor Stinner | f453221 | 2020-05-12 18:46:20 +0200 | [diff] [blame] | 12 | // Similar to _Py_HashPointer(), but don't replace -1 with -2 |
| 13 | PyAPI_FUNC(Py_hash_t) _Py_HashPointerRaw(const void*); |
Christian Heimes | 985ecdc | 2013-11-20 11:46:18 +0100 | [diff] [blame] | 14 | PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t); |
| 15 | #endif |
| 16 | |
| 17 | /* Prime multiplier used in string and various other hashes. */ |
| 18 | #define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */ |
| 19 | |
| 20 | /* Parameters used for the numeric hash implementation. See notes for |
Ned Batchelder | 01ae58d | 2017-11-09 11:55:34 -0500 | [diff] [blame] | 21 | _Py_HashDouble in Python/pyhash.c. Numeric hashes are based on |
Christian Heimes | 985ecdc | 2013-11-20 11:46:18 +0100 | [diff] [blame] | 22 | reduction modulo the prime 2**_PyHASH_BITS - 1. */ |
| 23 | |
| 24 | #if SIZEOF_VOID_P >= 8 |
| 25 | # define _PyHASH_BITS 61 |
| 26 | #else |
| 27 | # define _PyHASH_BITS 31 |
| 28 | #endif |
| 29 | |
| 30 | #define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1) |
| 31 | #define _PyHASH_INF 314159 |
| 32 | #define _PyHASH_NAN 0 |
| 33 | #define _PyHASH_IMAG _PyHASH_MULTIPLIER |
| 34 | |
| 35 | |
| 36 | /* hash secret |
| 37 | * |
| 38 | * memory layout on 64 bit systems |
| 39 | * cccccccc cccccccc cccccccc uc -- unsigned char[24] |
| 40 | * pppppppp ssssssss ........ fnv -- two Py_hash_t |
Benjamin Peterson | 9b3d770 | 2016-09-06 13:24:00 -0700 | [diff] [blame] | 41 | * k0k0k0k0 k1k1k1k1 ........ siphash -- two uint64_t |
Christian Heimes | 985ecdc | 2013-11-20 11:46:18 +0100 | [diff] [blame] | 42 | * ........ ........ ssssssss djbx33a -- 16 bytes padding + one Py_hash_t |
| 43 | * ........ ........ eeeeeeee pyexpat XML hash salt |
| 44 | * |
| 45 | * memory layout on 32 bit systems |
| 46 | * cccccccc cccccccc cccccccc uc |
| 47 | * ppppssss ........ ........ fnv -- two Py_hash_t |
Benjamin Peterson | 9b3d770 | 2016-09-06 13:24:00 -0700 | [diff] [blame] | 48 | * k0k0k0k0 k1k1k1k1 ........ siphash -- two uint64_t (*) |
Christian Heimes | 985ecdc | 2013-11-20 11:46:18 +0100 | [diff] [blame] | 49 | * ........ ........ ssss.... djbx33a -- 16 bytes padding + one Py_hash_t |
| 50 | * ........ ........ eeee.... pyexpat XML hash salt |
| 51 | * |
| 52 | * (*) The siphash member may not be available on 32 bit platforms without |
| 53 | * an unsigned int64 data type. |
| 54 | */ |
Martin v. Löwis | 1c0689c | 2014-01-03 21:36:49 +0100 | [diff] [blame] | 55 | #ifndef Py_LIMITED_API |
Christian Heimes | 985ecdc | 2013-11-20 11:46:18 +0100 | [diff] [blame] | 56 | typedef union { |
| 57 | /* ensure 24 bytes */ |
| 58 | unsigned char uc[24]; |
| 59 | /* two Py_hash_t for FNV */ |
| 60 | struct { |
| 61 | Py_hash_t prefix; |
| 62 | Py_hash_t suffix; |
| 63 | } fnv; |
Christian Heimes | 985ecdc | 2013-11-20 11:46:18 +0100 | [diff] [blame] | 64 | /* two uint64 for SipHash24 */ |
| 65 | struct { |
Benjamin Peterson | 9b3d770 | 2016-09-06 13:24:00 -0700 | [diff] [blame] | 66 | uint64_t k0; |
| 67 | uint64_t k1; |
Christian Heimes | 985ecdc | 2013-11-20 11:46:18 +0100 | [diff] [blame] | 68 | } siphash; |
Christian Heimes | 985ecdc | 2013-11-20 11:46:18 +0100 | [diff] [blame] | 69 | /* a different (!) Py_hash_t for small string optimization */ |
| 70 | struct { |
| 71 | unsigned char padding[16]; |
| 72 | Py_hash_t suffix; |
| 73 | } djbx33a; |
| 74 | struct { |
| 75 | unsigned char padding[16]; |
| 76 | Py_hash_t hashsalt; |
| 77 | } expat; |
| 78 | } _Py_HashSecret_t; |
| 79 | PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret; |
Martin v. Löwis | 1c0689c | 2014-01-03 21:36:49 +0100 | [diff] [blame] | 80 | #endif |
Christian Heimes | 985ecdc | 2013-11-20 11:46:18 +0100 | [diff] [blame] | 81 | |
| 82 | #ifdef Py_DEBUG |
| 83 | PyAPI_DATA(int) _Py_HashSecret_Initialized; |
| 84 | #endif |
| 85 | |
| 86 | |
| 87 | /* hash function definition */ |
| 88 | #ifndef Py_LIMITED_API |
| 89 | typedef struct { |
| 90 | Py_hash_t (*const hash)(const void *, Py_ssize_t); |
| 91 | const char *name; |
| 92 | const int hash_bits; |
| 93 | const int seed_bits; |
| 94 | } PyHash_FuncDef; |
| 95 | |
| 96 | PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void); |
| 97 | #endif |
| 98 | |
| 99 | |
| 100 | /* cutoff for small string DJBX33A optimization in range [1, cutoff). |
| 101 | * |
| 102 | * About 50% of the strings in a typical Python application are smaller than |
| 103 | * 6 to 7 chars. However DJBX33A is vulnerable to hash collision attacks. |
| 104 | * NEVER use DJBX33A for long strings! |
| 105 | * |
| 106 | * A Py_HASH_CUTOFF of 0 disables small string optimization. 32 bit platforms |
| 107 | * should use a smaller cutoff because it is easier to create colliding |
| 108 | * strings. A cutoff of 7 on 64bit platforms and 5 on 32bit platforms should |
| 109 | * provide a decent safety margin. |
| 110 | */ |
| 111 | #ifndef Py_HASH_CUTOFF |
| 112 | # define Py_HASH_CUTOFF 0 |
| 113 | #elif (Py_HASH_CUTOFF > 7 || Py_HASH_CUTOFF < 0) |
| 114 | # error Py_HASH_CUTOFF must in range 0...7. |
| 115 | #endif /* Py_HASH_CUTOFF */ |
| 116 | |
| 117 | |
| 118 | /* hash algorithm selection |
| 119 | * |
| 120 | * The values for Py_HASH_SIPHASH24 and Py_HASH_FNV are hard-coded in the |
| 121 | * configure script. |
| 122 | * |
| 123 | * - FNV is available on all platforms and architectures. |
Min ho Kim | 39d87b5 | 2019-08-31 06:21:19 +1000 | [diff] [blame] | 124 | * - SIPHASH24 only works on platforms that don't require aligned memory for integers. |
Christian Heimes | 985ecdc | 2013-11-20 11:46:18 +0100 | [diff] [blame] | 125 | * - With EXTERNAL embedders can provide an alternative implementation with:: |
| 126 | * |
| 127 | * PyHash_FuncDef PyHash_Func = {...}; |
| 128 | * |
| 129 | * XXX: Figure out __declspec() for extern PyHash_FuncDef. |
| 130 | */ |
| 131 | #define Py_HASH_EXTERNAL 0 |
| 132 | #define Py_HASH_SIPHASH24 1 |
| 133 | #define Py_HASH_FNV 2 |
| 134 | |
| 135 | #ifndef Py_HASH_ALGORITHM |
Benjamin Peterson | 9b3d770 | 2016-09-06 13:24:00 -0700 | [diff] [blame] | 136 | # ifndef HAVE_ALIGNED_REQUIRED |
Christian Heimes | 985ecdc | 2013-11-20 11:46:18 +0100 | [diff] [blame] | 137 | # define Py_HASH_ALGORITHM Py_HASH_SIPHASH24 |
| 138 | # else |
| 139 | # define Py_HASH_ALGORITHM Py_HASH_FNV |
| 140 | # endif /* uint64_t && uint32_t && aligned */ |
| 141 | #endif /* Py_HASH_ALGORITHM */ |
| 142 | |
| 143 | #ifdef __cplusplus |
| 144 | } |
| 145 | #endif |
| 146 | |
| 147 | #endif /* !Py_HASH_H */ |