Chih-Hung Hsieh | adb9edf | 2020-04-07 14:24:02 -0700 | [diff] [blame] | 1 | // Copyright 2015 The Rust Project Developers. See the COPYRIGHT |
| 2 | // file at the top-level directory of this distribution and at |
| 3 | // http://rust-lang.org/COPYRIGHT. |
| 4 | // |
| 5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| 8 | // option. This file may not be copied, modified, or distributed |
| 9 | // except according to those terms. |
| 10 | |
| 11 | //! Fast, non-cryptographic hash used by rustc and Firefox. |
| 12 | //! |
| 13 | //! # Example |
| 14 | //! |
| 15 | //! ```rust |
| 16 | //! # #[cfg(feature = "std")] |
| 17 | //! # fn main() { |
| 18 | //! use rustc_hash::FxHashMap; |
| 19 | //! let mut map: FxHashMap<u32, u32> = FxHashMap::default(); |
| 20 | //! map.insert(22, 44); |
| 21 | //! # } |
| 22 | //! # #[cfg(not(feature = "std"))] |
| 23 | //! # fn main() { } |
| 24 | //! ``` |
| 25 | |
| 26 | #![no_std] |
| 27 | |
| 28 | #[cfg(feature = "std")] |
| 29 | extern crate std; |
| 30 | |
| 31 | use core::convert::TryInto; |
| 32 | use core::default::Default; |
| 33 | #[cfg(feature = "std")] |
| 34 | use core::hash::BuildHasherDefault; |
| 35 | use core::hash::Hasher; |
| 36 | use core::mem::size_of; |
| 37 | use core::ops::BitXor; |
| 38 | #[cfg(feature = "std")] |
| 39 | use std::collections::{HashMap, HashSet}; |
| 40 | |
| 41 | /// Type alias for a hashmap using the `fx` hash algorithm. |
| 42 | #[cfg(feature = "std")] |
| 43 | pub type FxHashMap<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher>>; |
| 44 | |
| 45 | /// Type alias for a hashmap using the `fx` hash algorithm. |
| 46 | #[cfg(feature = "std")] |
| 47 | pub type FxHashSet<V> = HashSet<V, BuildHasherDefault<FxHasher>>; |
| 48 | |
| 49 | /// A speedy hash algorithm for use within rustc. The hashmap in liballoc |
| 50 | /// by default uses SipHash which isn't quite as speedy as we want. In the |
| 51 | /// compiler we're not really worried about DOS attempts, so we use a fast |
| 52 | /// non-cryptographic hash. |
| 53 | /// |
| 54 | /// This is the same as the algorithm used by Firefox -- which is a homespun |
| 55 | /// one not based on any widely-known algorithm -- though modified to produce |
| 56 | /// 64-bit hash values instead of 32-bit hash values. It consistently |
| 57 | /// out-performs an FNV-based hash within rustc itself -- the collision rate is |
| 58 | /// similar or slightly worse than FNV, but the speed of the hash function |
| 59 | /// itself is much higher because it works on up to 8 bytes at a time. |
| 60 | pub struct FxHasher { |
| 61 | hash: usize, |
| 62 | } |
| 63 | |
| 64 | #[cfg(target_pointer_width = "32")] |
| 65 | const K: usize = 0x9e3779b9; |
| 66 | #[cfg(target_pointer_width = "64")] |
| 67 | const K: usize = 0x517cc1b727220a95; |
| 68 | |
| 69 | impl Default for FxHasher { |
| 70 | #[inline] |
| 71 | fn default() -> FxHasher { |
| 72 | FxHasher { hash: 0 } |
| 73 | } |
| 74 | } |
| 75 | |
| 76 | impl FxHasher { |
| 77 | #[inline] |
| 78 | fn add_to_hash(&mut self, i: usize) { |
| 79 | self.hash = self.hash.rotate_left(5).bitxor(i).wrapping_mul(K); |
| 80 | } |
| 81 | } |
| 82 | |
| 83 | impl Hasher for FxHasher { |
| 84 | #[inline] |
| 85 | fn write(&mut self, mut bytes: &[u8]) { |
| 86 | #[cfg(target_pointer_width = "32")] |
| 87 | let read_usize = |bytes: &[u8]| u32::from_ne_bytes(bytes[..4].try_into().unwrap()); |
| 88 | #[cfg(target_pointer_width = "64")] |
| 89 | let read_usize = |bytes: &[u8]| u64::from_ne_bytes(bytes[..8].try_into().unwrap()); |
| 90 | |
| 91 | let mut hash = FxHasher { hash: self.hash }; |
| 92 | assert!(size_of::<usize>() <= 8); |
| 93 | while bytes.len() >= size_of::<usize>() { |
| 94 | hash.add_to_hash(read_usize(bytes) as usize); |
| 95 | bytes = &bytes[size_of::<usize>()..]; |
| 96 | } |
| 97 | if (size_of::<usize>() > 4) && (bytes.len() >= 4) { |
| 98 | hash.add_to_hash(u32::from_ne_bytes(bytes[..4].try_into().unwrap()) as usize); |
| 99 | bytes = &bytes[4..]; |
| 100 | } |
| 101 | if (size_of::<usize>() > 2) && bytes.len() >= 2 { |
| 102 | hash.add_to_hash(u16::from_ne_bytes(bytes[..2].try_into().unwrap()) as usize); |
| 103 | bytes = &bytes[2..]; |
| 104 | } |
| 105 | if (size_of::<usize>() > 1) && bytes.len() >= 1 { |
| 106 | hash.add_to_hash(bytes[0] as usize); |
| 107 | } |
| 108 | self.hash = hash.hash; |
| 109 | } |
| 110 | |
| 111 | #[inline] |
| 112 | fn write_u8(&mut self, i: u8) { |
| 113 | self.add_to_hash(i as usize); |
| 114 | } |
| 115 | |
| 116 | #[inline] |
| 117 | fn write_u16(&mut self, i: u16) { |
| 118 | self.add_to_hash(i as usize); |
| 119 | } |
| 120 | |
| 121 | #[inline] |
| 122 | fn write_u32(&mut self, i: u32) { |
| 123 | self.add_to_hash(i as usize); |
| 124 | } |
| 125 | |
| 126 | #[cfg(target_pointer_width = "32")] |
| 127 | #[inline] |
| 128 | fn write_u64(&mut self, i: u64) { |
| 129 | self.add_to_hash(i as usize); |
| 130 | self.add_to_hash((i >> 32) as usize); |
| 131 | } |
| 132 | |
| 133 | #[cfg(target_pointer_width = "64")] |
| 134 | #[inline] |
| 135 | fn write_u64(&mut self, i: u64) { |
| 136 | self.add_to_hash(i as usize); |
| 137 | } |
| 138 | |
| 139 | #[inline] |
| 140 | fn write_usize(&mut self, i: usize) { |
| 141 | self.add_to_hash(i); |
| 142 | } |
| 143 | |
| 144 | #[inline] |
| 145 | fn finish(&self) -> u64 { |
| 146 | self.hash as u64 |
| 147 | } |
| 148 | } |