Christian Heimes | 985ecdc | 2013-11-20 11:46:18 +0100 | [diff] [blame] | 1 | #ifndef Py_HASH_H |
| 2 | |
| 3 | #define Py_HASH_H |
| 4 | #ifdef __cplusplus |
| 5 | extern "C" { |
| 6 | #endif |
| 7 | |
| 8 | /* Helpers for hash functions */ |
| 9 | #ifndef Py_LIMITED_API |
| 10 | PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double); |
| 11 | PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*); |
| 12 | PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t); |
| 13 | #endif |
| 14 | |
| 15 | /* Prime multiplier used in string and various other hashes. */ |
| 16 | #define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */ |
| 17 | |
| 18 | /* Parameters used for the numeric hash implementation. See notes for |
| 19 | _Py_HashDouble in Objects/object.c. Numeric hashes are based on |
| 20 | reduction modulo the prime 2**_PyHASH_BITS - 1. */ |
| 21 | |
| 22 | #if SIZEOF_VOID_P >= 8 |
| 23 | # define _PyHASH_BITS 61 |
| 24 | #else |
| 25 | # define _PyHASH_BITS 31 |
| 26 | #endif |
| 27 | |
| 28 | #define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1) |
| 29 | #define _PyHASH_INF 314159 |
| 30 | #define _PyHASH_NAN 0 |
| 31 | #define _PyHASH_IMAG _PyHASH_MULTIPLIER |
| 32 | |
| 33 | |
| 34 | /* hash secret |
| 35 | * |
| 36 | * memory layout on 64 bit systems |
| 37 | * cccccccc cccccccc cccccccc uc -- unsigned char[24] |
| 38 | * pppppppp ssssssss ........ fnv -- two Py_hash_t |
| 39 | * k0k0k0k0 k1k1k1k1 ........ siphash -- two PY_UINT64_T |
| 40 | * ........ ........ ssssssss djbx33a -- 16 bytes padding + one Py_hash_t |
| 41 | * ........ ........ eeeeeeee pyexpat XML hash salt |
| 42 | * |
| 43 | * memory layout on 32 bit systems |
| 44 | * cccccccc cccccccc cccccccc uc |
| 45 | * ppppssss ........ ........ fnv -- two Py_hash_t |
| 46 | * k0k0k0k0 k1k1k1k1 ........ siphash -- two PY_UINT64_T (*) |
| 47 | * ........ ........ ssss.... djbx33a -- 16 bytes padding + one Py_hash_t |
| 48 | * ........ ........ eeee.... pyexpat XML hash salt |
| 49 | * |
| 50 | * (*) The siphash member may not be available on 32 bit platforms without |
| 51 | * an unsigned int64 data type. |
| 52 | */ |
Martin v. Löwis | 1c0689c | 2014-01-03 21:36:49 +0100 | [diff] [blame] | 53 | #ifndef Py_LIMITED_API |
Christian Heimes | 985ecdc | 2013-11-20 11:46:18 +0100 | [diff] [blame] | 54 | typedef union { |
| 55 | /* ensure 24 bytes */ |
| 56 | unsigned char uc[24]; |
| 57 | /* two Py_hash_t for FNV */ |
| 58 | struct { |
| 59 | Py_hash_t prefix; |
| 60 | Py_hash_t suffix; |
| 61 | } fnv; |
| 62 | #ifdef PY_UINT64_T |
| 63 | /* two uint64 for SipHash24 */ |
| 64 | struct { |
| 65 | PY_UINT64_T k0; |
| 66 | PY_UINT64_T k1; |
| 67 | } siphash; |
| 68 | #endif |
| 69 | /* a different (!) Py_hash_t for small string optimization */ |
| 70 | struct { |
| 71 | unsigned char padding[16]; |
| 72 | Py_hash_t suffix; |
| 73 | } djbx33a; |
| 74 | struct { |
| 75 | unsigned char padding[16]; |
| 76 | Py_hash_t hashsalt; |
| 77 | } expat; |
| 78 | } _Py_HashSecret_t; |
| 79 | PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret; |
Martin v. Löwis | 1c0689c | 2014-01-03 21:36:49 +0100 | [diff] [blame] | 80 | #endif |
Christian Heimes | 985ecdc | 2013-11-20 11:46:18 +0100 | [diff] [blame] | 81 | |
| 82 | #ifdef Py_DEBUG |
| 83 | PyAPI_DATA(int) _Py_HashSecret_Initialized; |
| 84 | #endif |
| 85 | |
| 86 | |
| 87 | /* hash function definition */ |
| 88 | #ifndef Py_LIMITED_API |
| 89 | typedef struct { |
| 90 | Py_hash_t (*const hash)(const void *, Py_ssize_t); |
| 91 | const char *name; |
| 92 | const int hash_bits; |
| 93 | const int seed_bits; |
| 94 | } PyHash_FuncDef; |
| 95 | |
| 96 | PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void); |
| 97 | #endif |
| 98 | |
| 99 | |
| 100 | /* cutoff for small string DJBX33A optimization in range [1, cutoff). |
| 101 | * |
| 102 | * About 50% of the strings in a typical Python application are smaller than |
| 103 | * 6 to 7 chars. However DJBX33A is vulnerable to hash collision attacks. |
| 104 | * NEVER use DJBX33A for long strings! |
| 105 | * |
| 106 | * A Py_HASH_CUTOFF of 0 disables small string optimization. 32 bit platforms |
| 107 | * should use a smaller cutoff because it is easier to create colliding |
| 108 | * strings. A cutoff of 7 on 64bit platforms and 5 on 32bit platforms should |
| 109 | * provide a decent safety margin. |
| 110 | */ |
| 111 | #ifndef Py_HASH_CUTOFF |
| 112 | # define Py_HASH_CUTOFF 0 |
| 113 | #elif (Py_HASH_CUTOFF > 7 || Py_HASH_CUTOFF < 0) |
| 114 | # error Py_HASH_CUTOFF must in range 0...7. |
| 115 | #endif /* Py_HASH_CUTOFF */ |
| 116 | |
| 117 | |
| 118 | /* hash algorithm selection |
| 119 | * |
| 120 | * The values for Py_HASH_SIPHASH24 and Py_HASH_FNV are hard-coded in the |
| 121 | * configure script. |
| 122 | * |
| 123 | * - FNV is available on all platforms and architectures. |
| 124 | * - SIPHASH24 only works on plaforms that provide PY_UINT64_T and doesn't |
| 125 | * require aligned memory for integers. |
| 126 | * - With EXTERNAL embedders can provide an alternative implementation with:: |
| 127 | * |
| 128 | * PyHash_FuncDef PyHash_Func = {...}; |
| 129 | * |
| 130 | * XXX: Figure out __declspec() for extern PyHash_FuncDef. |
| 131 | */ |
| 132 | #define Py_HASH_EXTERNAL 0 |
| 133 | #define Py_HASH_SIPHASH24 1 |
| 134 | #define Py_HASH_FNV 2 |
| 135 | |
| 136 | #ifndef Py_HASH_ALGORITHM |
| 137 | # if (defined(PY_UINT64_T) && defined(PY_UINT32_T) \ |
| 138 | && !defined(HAVE_ALIGNED_REQUIRED)) |
| 139 | # define Py_HASH_ALGORITHM Py_HASH_SIPHASH24 |
| 140 | # else |
| 141 | # define Py_HASH_ALGORITHM Py_HASH_FNV |
| 142 | # endif /* uint64_t && uint32_t && aligned */ |
| 143 | #endif /* Py_HASH_ALGORITHM */ |
| 144 | |
| 145 | #ifdef __cplusplus |
| 146 | } |
| 147 | #endif |
| 148 | |
| 149 | #endif /* !Py_HASH_H */ |